You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@harmony.apache.org by nd...@apache.org on 2009/04/18 20:46:33 UTC
svn commit: r766375 - in
/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex:
MatchResult.java Matcher.java Pattern.java PatternSyntaxException.java
Author: ndbeyer
Date: Sat Apr 18 18:46:32 2009
New Revision: 766375
URL: http://svn.apache.org/viewvc?rev=766375&view=rev
Log:
Apply patch for HARMONY-6157 - Javadocs for java.util.regex.*
Modified:
harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/MatchResult.java
harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Matcher.java
harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java
Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/MatchResult.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/MatchResult.java?rev=766375&r1=766374&r2=766375&view=diff
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/MatchResult.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/MatchResult.java Sat Apr 18 18:46:32 2009
@@ -22,45 +22,83 @@
package java.util.regex;
/**
- * @com.intel.drl.spec_ref
- *
+ * Holds the results of a successful match of a {@link Pattern} against a
+ * given string. The result is divided into groups, with one group for each
+ * pair of parentheses in the regular expression and an additional group for
+ * the whole regular expression. The start, end, and contents of each group
+ * can be queried.
+ *
+ * @see Matcher
+ * @see Matcher#toMatchResult()
+ *
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.6.2.2 $
*/
public interface MatchResult {
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character following the text that matched
+ * the whole regular expression.
+ *
+ * @return the character index.
*/
int end();
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character following the text that matched
+ * a given group.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ *
+ * @return the character index.
*/
int end(int group);
/**
- * @com.intel.drl.spec_ref
+ * Returns the text that matched the whole regular expression.
+ *
+ * @return the text.
*/
String group();
/**
- * @com.intel.drl.spec_ref
+ * Returns the text that matched a given group of the regular expression.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ *
+ * @return the text that matched the group.
*/
String group(int group);
/**
- * @com.intel.drl.spec_ref
+ * Returns the number of groups in the result, which is always equal to
+ * the number of groups in the original regular expression.
+ *
+ * @return the number of groups.
*/
int groupCount();
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character of the text that matched
+ * the whole regular expression.
+ *
+ * @return the character index.
*/
int start();
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character of the text that matched a given
+ * group.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ *
+ * @return the character index.
*/
int start(int group);
}
Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Matcher.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Matcher.java?rev=766375&r1=766374&r2=766375&view=diff
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Matcher.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Matcher.java Sat Apr 18 18:46:32 2009
@@ -22,7 +22,31 @@
import org.apache.harmony.regex.internal.nls.Messages;
/**
- * Note: main functionality of this class is hidden into nodes match methods.
+ * Provides a means of matching regular expressions against a given input,
+ * finding occurrences of regular expressions in a given input, or replacing
+ * parts of a given input. A {@code Matcher} instance has an associated {@link
+ * Pattern} instance and an input text. A typical use case is to
+ * iteratively find all occurrences of the {@code Pattern}, until the end of
+ * the input is reached, as the following example illustrates:
+ *
+ * <p/>
+ *
+ * <pre>
+ * Pattern p = Pattern.compile("[A-Za-z]+");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * while (m.find()) {
+ * System.out.println(m.group()); // prints "Hello" and "Android"
+ * }
+ * </pre>
+ *
+ * <p/>
+ *
+ * The {@code Matcher} has a state that results from the previous operations.
+ * For example, it knows whether the most recent attempt to find the
+ * {@code Pattern} was successful and at which position the next attempt would
+ * resume the search. Depending on the application's needs, it may become
+ * necessary to explicitly {@link #reset()} this state from time to time.
*/
public final class Matcher implements MatchResult {
@@ -53,12 +77,25 @@
private ArrayList replacementParts = null;
/**
- * @com.intel.drl.spec_ref
+ * Appends a literal part of the input plus a replacement for the current
+ * match to a given {@link StringBuffer}. The literal part is exactly the
+ * part of the input between the previous match and the current match. The
+ * method can be used in conjunction with {@link #find()} and
+ * {@link #appendTail(StringBuffer)} to walk through the input and replace
+ * all occurrences of the {@code Pattern} with something else.
+ *
+ * @param buffer
+ * the {@code StringBuffer} to append to.
+ * @param replacement
+ * the replacement text.
+ * @return the {@code Matcher} itself.
+ * @throws IllegalStateException
+ * if no successful match has been made.
*/
- public Matcher appendReplacement(StringBuffer sb, String replacement) {
+ public Matcher appendReplacement(StringBuffer buffer, String replacement) {
processedRepl = processReplacement(replacement);
- sb.append(string.subSequence(appendPos, start()));
- sb.append(processedRepl);
+ buffer.append(string.subSequence(appendPos, start()));
+ buffer.append(processedRepl);
appendPos = end();
return this;
}
@@ -148,18 +185,31 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Provides a new input and resets the {@code Matcher}. This results in the
+ * region being set to the whole input. Results of a previous find get lost.
+ * The next attempt to find an occurrence of the {@link Pattern} in the
+ * string will start at the beginning of the input.
+ *
+ * @param input
+ * the new input sequence.
+ *
+ * @return the {@code Matcher} itself.
*/
- public Matcher reset(CharSequence newSequence) {
- if (newSequence == null) {
+ public Matcher reset(CharSequence input) {
+ if (input == null) {
throw new NullPointerException(Messages.getString("regex.01")); //$NON-NLS-1$
}
- this.string = newSequence;
+ this.string = input;
return reset();
}
/**
- * @com.intel.drl.spec_ref
+ * Resets the {@code Matcher}. This results in the region being set to the
+ * whole input. Results of a previous find get lost. The next attempt to
+ * find an occurrence of the {@link Pattern} in the string will start at the
+ * beginning of the input.
+ *
+ * @return the {@code Matcher} itself.
*/
public Matcher reset() {
this.leftBound = 0;
@@ -172,44 +222,57 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Resets this matcher and sets a region. Only characters inside the region
+ * are considered for a match.
+ *
+ * @param start
+ * the first character of the region.
+ * @param end
+ * the first character after the end of the region.
+ * @return the {@code Matcher} itself.
*/
- public Matcher region(int leftBound, int rightBound) {
+ public Matcher region(int start, int end) {
- if (leftBound > rightBound || leftBound < 0 || rightBound < 0
- || leftBound > string.length() || rightBound > string.length()) {
+ if (start > end || start < 0 || end < 0
+ || start > string.length() || end > string.length()) {
throw new IndexOutOfBoundsException( Messages.getString("regex.02", //$NON-NLS-1$
- Integer.toString(leftBound), Integer.toString(rightBound)));
+ Integer.toString(start), Integer.toString(end)));
}
- this.leftBound = leftBound;
- this.rightBound = rightBound;
- matchResult.reset(null, leftBound, rightBound);
+ this.leftBound = start;
+ this.rightBound = end;
+ matchResult.reset(null, start, end);
appendPos = 0;
replacement = null;
return this;
}
- /**
- * TODO: appendTail(StringBuffer) javadoc
- *
- * @param sb
- * @return
- */
- public StringBuffer appendTail(StringBuffer sb) {
- return sb.append(string.subSequence(appendPos, string.length()));
- }
-
- /**
- * This is very similar to replaceAll except only the first occurrence of a
- * sequence matching the pattern is replaced.
- *
- * @param replacement
- * A string to replace occurrences of character sequences
- * matching the pattern.
- * @return A new string with replacements inserted
- */
+ /**
+ * Appends the (unmatched) remainder of the input to the given
+ * {@link StringBuffer}. The method can be used in conjunction with
+ * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
+ * walk through the input and replace all matches of the {@code Pattern}
+ * with something else.
+ *
+ * @param buffer
+ * the {@code StringBuffer} to append to.
+ * @return the {@code StringBuffer}.
+ * @throws IllegalStateException
+ * if no successful match has been made.
+ */
+ public StringBuffer appendTail(StringBuffer buffer) {
+ return buffer.append(string.subSequence(appendPos, string.length()));
+ }
+
+ /**
+ * Replaces the first occurrence of this matcher's pattern in the input with
+ * a given string.
+ *
+ * @param replacement
+ * the replacement text.
+ * @return the modified input string.
+ */
public String replaceFirst(String replacement) {
reset();
if (find()) {
@@ -222,16 +285,14 @@
}
- /**
- * Replace all occurrences of character sequences which match the pattern
- * with the given replacement string. The replacement string may refer to
- * capturing groups using the syntax "$<group number>".
- *
- * @param replacement
- * A string to replace occurrences of character sequences
- * matching the pattern.
- * @return A new string with replacements inserted
- */
+ /**
+ * Replaces all occurrences of this matcher's pattern in the input with a
+ * given string.
+ *
+ * @param replacement
+ * the replacement text.
+ * @return the modified input string.
+ */
public String replaceAll(String replacement) {
StringBuffer sb = new StringBuffer();
reset();
@@ -242,40 +303,59 @@
return appendTail(sb).toString();
}
- /**
- * Return a reference to the pattern used by this Matcher.
- *
- * @return A reference to the pattern used by this Matcher.
- */
+ /**
+ * Returns the {@link Pattern} instance used inside this matcher.
+ *
+ * @return the {@code Pattern} instance.
+ */
public Pattern pattern() {
return pat;
}
/**
- * @com.intel.drl.spec_ref
- */
- public String group(int groupIndex) {
- return matchResult.group(groupIndex);
+ * Returns the text that matched a given group of the regular expression.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ * @return the text that matched the group.
+ * @throws IllegalStateException
+ * if no successful match has been made.
+ */
+ public String group(int group) {
+ return matchResult.group(group);
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the text that matched the whole regular expression.
+ *
+ * @return the text.
+ * @throws IllegalStateException
+ * if no successful match has been made.
*/
public String group() {
return group(0);
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the next occurrence of the {@link Pattern} in the input. The
+ * method starts the search from the given character in the input.
+ *
+ * @param start
+ * The index in the input at which the find operation is to
+ * begin. If this is less than the start of the region, it is
+ * automatically adjusted to that value. If it is beyond the end
+ * of the region, the method will fail.
+ * @return true if (and only if) a match has been found.
*/
- public boolean find(int startIndex) {
+ public boolean find(int start) {
int stringLength = string.length();
- if (startIndex < 0 || startIndex > stringLength)
+ if (start < 0 || start > stringLength)
throw new IndexOutOfBoundsException(Messages.getString("regex.03", //$NON-NLS-1$
- new Integer(startIndex)));
+ new Integer(start)));
- startIndex = findAt(startIndex);
- if (startIndex >= 0 && matchResult.isValid()) {
+ start = findAt(start);
+ if (start >= 0 && matchResult.isValid()) {
matchResult.finalizeMatch();
return true;
}
@@ -294,14 +374,14 @@
return foundIndex;
}
- /**
- * The find() method matches the pattern against the character sequence
- * beginning at the character after the last match or at the beginning of
- * the sequence if called immediately after reset(). The method returns true
- * if and only if a match is found.
- *
- * @return A boolean indicating if the pattern was matched.
- */
+ /**
+ * Returns the next occurrence of the {@link Pattern} in the input. If a
+ * previous match was successful, the method continues the search from the
+ * first character following that match in the input. Otherwise it searches
+ * either from the region start (if one has been set), or from position 0.
+ *
+ * @return true if (and only if) a match has been found.
+ */
public boolean find() {
int length = string.length();
if (!hasTransparentBounds())
@@ -321,45 +401,66 @@
}
/**
- * @com.intel.drl.spec_ref
- */
- public int start(int groupIndex) {
- return matchResult.start(groupIndex);
+ * Returns the index of the first character of the text that matched a given
+ * group.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ * @return the character index.
+ * @throws IllegalStateException
+ * if no successful match has been made.
+ */
+ public int start(int group) {
+ return matchResult.start(group);
}
/**
- * @com.intel.drl.spec_ref
- */
- public int end(int groupIndex) {
- return matchResult.end(groupIndex);
+ * Returns the index of the first character following the text that matched
+ * a given group.
+ *
+ * @param group
+ * the group, ranging from 0 to groupCount() - 1, with 0
+ * representing the whole pattern.
+ * @return the character index.
+ * @throws IllegalStateException
+ * if no successful match has been made.
+ */
+ public int end(int group) {
+ return matchResult.end(group);
}
- /**
- * This method is identical in function to the Pattern.matches() method. It
- * returns true if and only if the regular expression pattern matches the
- * entire input character sequence.
- *
- * @return A boolean indicating if the pattern matches the entire input
- * character sequence.
- */
+ /**
+ * Tries to match the {@link Pattern} against the entire region (or the
+ * entire input, if no region has been set).
+ *
+ * @return true if (and only if) the {@code Pattern} matches the entire
+ * region.
+ */
public boolean matches() {
return lookingAt(leftBound, Matcher.MODE_MATCH);
}
/**
- * @com.intel.drl.spec_ref
+ * Returns a replacement string for the given one that has all backslashes
+ * and dollar signs escaped.
+ *
+ * @param s
+ * the input string.
+ * @return the input string, with all backslashes and dollar signs having
+ * been escaped.
*/
- public static String quoteReplacement(String string) {
+ public static String quoteReplacement(String s) {
// first check whether we have smth to quote
- if (string.indexOf('\\') < 0 && string.indexOf('$') < 0)
- return string;
- StringBuffer res = new StringBuffer(string.length() * 2);
+ if (s.indexOf('\\') < 0 && s.indexOf('$') < 0)
+ return s;
+ StringBuffer res = new StringBuffer(s.length() * 2);
char ch;
- int len = string.length();
+ int len = s.length();
for (int i = 0; i < len; i++) {
- switch (ch = string.charAt(i)) {
+ switch (ch = s.charAt(i)) {
case '$':
res.append('\\');
res.append('$');
@@ -392,15 +493,13 @@
return false;
}
- /**
- * This method attempts to match the pattern against the character sequence
- * starting at the beginning. If the pattern matches even a prefix of the
- * input character sequence, lookingAt() will return true. Otherwise it will
- * return false.
- *
- * @return A boolean indicating if the pattern matches a prefix of the input
- * character sequence.
- */
+ /**
+ * Tries to match the {@link Pattern}, starting from the beginning of the
+ * region (or the beginning of the input, if no region has been set).
+ * Doesn't require the {@code Pattern} to match against the whole region.
+ *
+ * @return true if (and only if) the {@code Pattern} matches.
+ */
public boolean lookingAt() {
return lookingAt(leftBound, Matcher.MODE_FIND);
}
@@ -413,37 +512,61 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character of the text that matched the
+ * whole regular expression.
+ *
+ * @return the character index.
+ * @throws IllegalStateException
+ * if no successful match has been made.
*/
public int start() {
return start(0);
}
- /**
- * Return the number of capturing groups in the pattern.
- *
- * @return The number of capturing groups in the pattern.
- */
+ /**
+ * Returns the number of groups in the results, which is always equal to
+ * the number of groups in the original regular expression.
+ *
+ * @return the number of groups.
+ */
public int groupCount() {
return matchResult.groupCount();
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the index of the first character following the text that matched
+ * the whole regular expression.
+ *
+ * @return the character index.
+ * @throws IllegalStateException
+ * if no successful match has been made.
*/
public int end() {
return end(0);
}
/**
- * @com.intel.drl.spec_ref
+ * Converts the current match into a separate {@link MatchResult} instance
+ * that is independent from this matcher. The new object is unaffected when
+ * the state of this matcher changes.
+ *
+ * @return the new {@code MatchResult}.
+ * @throws IllegalStateException
+ * if no successful match has been made.
*/
public MatchResult toMatchResult() {
return this.matchResult.cloneImpl();
}
/**
- * @com.intel.drl.spec_ref
+ * Determines whether this matcher has anchoring bounds enabled or not. When
+ * anchoring bounds are enabled, the start and end of the input match the
+ * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
+ * by default.
+ *
+ * @param value
+ * the new value for anchoring bounds.
+ * @return the {@code Matcher} itself.
*/
public Matcher useAnchoringBounds(boolean value) {
matchResult.useAnchoringBounds(value);
@@ -451,14 +574,26 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Indicates whether this matcher has anchoring bounds enabled. When
+ * anchoring bounds are enabled, the start and end of the input match the
+ * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
+ * by default.
+ *
+ * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
*/
public boolean hasAnchoringBounds() {
return matchResult.hasAnchoringBounds();
}
/**
- * @com.intel.drl.spec_ref
+ * Determines whether this matcher has transparent bounds enabled or not.
+ * When transparent bounds are enabled, the parts of the input outside the
+ * region are subject to lookahead and lookbehind, otherwise they are not.
+ * Transparent bounds are disabled by default.
+ *
+ * @param value
+ * the new value for transparent bounds.
+ * @return the {@code Matcher} itself.
*/
public Matcher useTransparentBounds(boolean value) {
matchResult.useTransparentBounds(value);
@@ -466,53 +601,77 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Indicates whether this matcher has transparent bounds enabled. When
+ * transparent bounds are enabled, the parts of the input outside the region
+ * are subject to lookahead and lookbehind, otherwise they are not.
+ * Transparent bounds are disabled by default.
+ *
+ * @return true if (and only if) the {@code Matcher} uses anchoring bounds.
*/
public boolean hasTransparentBounds() {
return matchResult.hasTransparentBounds();
}
/**
- * @com.intel.drl.spec_ref
+ * Returns this matcher's region start, that is, the first character that is
+ * considered for a match.
+ *
+ * @return the start of the region.
*/
public int regionStart() {
return matchResult.getLeftBound();
}
/**
- * @com.intel.drl.spec_ref
+ * Returns this matcher's region end, that is, the first character that is
+ * not considered for a match.
+ *
+ * @return the end of the region.
*/
public int regionEnd() {
return matchResult.getRightBound();
}
/**
- * @com.intel.drl.spec_ref
+ * Indicates whether more input might change a successful match into an
+ * unsuccessful one.
+ *
+ * @return true if (and only if) more input might change a successful match
+ * into an unsuccessful one.
*/
public boolean requireEnd() {
return matchResult.requireEnd;
}
/**
- * @com.intel.drl.spec_ref
+ * Indicates whether the last match hit the end of the input.
+ *
+ * @return true if (and only if) the last match hit the end of the input.
*/
public boolean hitEnd() {
return matchResult.hitEnd;
}
/**
- * @com.intel.drl.spec_ref
+ * Sets a new pattern for the {@code Matcher}. Results of a previous find
+ * get lost. The next attempt to find an occurrence of the {@link Pattern}
+ * in the string will start at the beginning of the input.
+ *
+ * @param pattern
+ * the new {@code Pattern}.
+ *
+ * @return the {@code Matcher} itself.
*/
- public Matcher usePattern(Pattern pat) {
- if (pat == null) {
+ public Matcher usePattern(Pattern pattern) {
+ if (pattern == null) {
throw new IllegalArgumentException(Messages.getString("regex.1B"));
}
int startIndex = matchResult.getPreviousMatchEnd();
int mode = matchResult.mode();
- this.pat = pat;
- this.start = pat.start;
+ this.pat = pattern;
+ this.start = pattern.start;
matchResult = new MatchResultImpl(this.string, leftBound, rightBound,
- pat.groupCount(), pat.compCount(), pat.consCount());
+ pattern.groupCount(), pattern.compCount(), pattern.consCount());
matchResult.setStartIndex(startIndex);
matchResult.setMode(mode);
return this;
Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java?rev=766375&r1=766374&r2=766375&view=diff
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java Sat Apr 18 18:46:32 2009
@@ -29,12 +29,33 @@
/**
- * Pattern implements a compiler for regular expressions as defined by the J2SE
- * specification. The regular expression syntax is largely similar to the syntax
- * defined by Perl 5 but has both omissions and extensions. A formal and
- * complete definition of the regular expression syntax is not provided by the
- * J2SE speTBD (TODO)
- *
+ * Represents a pattern used for matching, searching, or replacing strings.
+ * {@code Pattern}s are specified in terms of regular expressions and compiled
+ * using an instance of this class. They are then used in conjunction with a
+ * {@link Matcher} to perform the actual search.
+ * <p/>
+ * A typical use case looks like this:
+ * <p/>
+ * <pre>
+ * Pattern p = Pattern.compile("Hello, A[a-z]*!");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * boolean b1 = m.matches(); // true
+ *
+ * m.setInput("Hello, Robot!");
+ * boolean b2 = m.matches(); // false
+ * </pre>
+ * <p/>
+ * The above code could also be written in a more compact fashion, though this
+ * variant is less efficient, since {@code Pattern} and {@code Matcher} objects
+ * are created on the fly instead of being reused.
+ * fashion:
+ * <pre>
+ * boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
+ * boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!"); // false
+ * </pre>
+ *
+ * @see Matcher
*/
public final class Pattern implements Serializable {
@@ -43,42 +64,56 @@
static final boolean _DEBUG_ = false;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that a pattern matches Unix line endings ('\n')
+ * only against the '.', '^', and '$' meta characters.
*/
public static final int UNIX_LINES = 1 << 0;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that a {@code Pattern} is matched
+ * case-insensitively. That is, the patterns "a+" and "A+" would both match
+ * the string "aAaAaA".
*/
public static final int CASE_INSENSITIVE = 1 << 1;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that a {@code Pattern} may contain whitespace or
+ * comments. Otherwise comments and whitespace are taken as literal
+ * characters.
*/
public static final int COMMENTS = 1 << 2;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that the meta characters '^' and '$' match only
+ * the beginning and end end of an input line, respectively. Normally, they
+ * match the beginning and the end of the complete input.
*/
public static final int MULTILINE = 1 << 3;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that the whole {@code Pattern} is to be taken
+ * literally, that is, all meta characters lose their meanings.
*/
public static final int LITERAL = 1 << 4;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that the '.' meta character matches arbitrary
+ * characters, including line endings, which is normally not the case.
*/
public static final int DOTALL = 1 << 5;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that a {@code Pattern} is matched
+ * case-insensitively with regard to all Unicode characters. It is used in
+ * conjunction with the {@link #CASE_INSENSITIVE} constant to extend its
+ * meaning to all Unicode characters.
*/
public static final int UNICODE_CASE = 1 << 6;
/**
- * @com.intel.drl.spec_ref
+ * This constant specifies that a character in a {@code Pattern} and a
+ * character in the input string only match if they are canonically
+ * equivalent.
*/
public static final int CANON_EQ = 1 << 7;
@@ -128,45 +163,64 @@
transient AbstractSet start = null;
- /**
- * Create a matcher for this pattern and a given input character sequence
- *
- * @param cs
- * The input character sequence
- * @return A new matcher
- */
- public Matcher matcher(CharSequence cs) {
- return new Matcher(this, cs);
- }
-
- /**
- * Split an input string using the pattern as a token separator.
- *
- * @param input
- * Input sequence to tokenize
- * @param limit
- * If positive, the maximum number of tokens to return. If
- * negative, an indefinite number of tokens are returned. If
- * zero, an indefinite number of tokens are returned but trailing
- * empty tokens are excluded.
- * @return A sequence of tokens split out of the input string.
- */
- public String[] split(CharSequence input, int limit) {
+ /**
+ * Returns a {@link Matcher} for the {@code Pattern} and a given input. The
+ * {@code Matcher} can be used to match the {@code Pattern} against the
+ * whole input, find occurrences of the {@code Pattern} in the input, or
+ * replace parts of the input.
+ *
+ * @param input
+ * the input to process.
+ *
+ * @return the resulting {@code Matcher}.
+ */
+ public Matcher matcher(CharSequence input) {
+ return new Matcher(this, input);
+ }
+
+ /**
+ * Splits the given input sequence around occurrences of the {@code Pattern}.
+ * The function first determines all occurrences of the {@code Pattern}
+ * inside the input sequence. It then builds an array of the
+ * "remaining" strings before, in-between, and after these
+ * occurrences. An additional parameter determines the maximal number of
+ * entries in the resulting array and the handling of trailing empty
+ * strings.
+ *
+ * @param inputSeq
+ * the input sequence.
+ * @param limit
+ * Determines the maximal number of entries in the resulting
+ * array.
+ * <ul>
+ * <li>For n > 0, it is guaranteed that the resulting array
+ * contains at most n entries.
+ * <li>For n < 0, the length of the resulting array is
+ * exactly the number of occurrences of the {@code Pattern} +1.
+ * All entries are included.
+ * <li>For n == 0, the length of the resulting array is at most
+ * the number of occurrences of the {@code Pattern} +1. Empty
+ * strings at the end of the array are not included.
+ * </ul>
+ *
+ * @return the resulting array.
+ */
+ public String[] split(CharSequence inputSeq, int limit) {
ArrayList res = new ArrayList();
- Matcher mat = matcher(input);
+ Matcher mat = matcher(inputSeq);
int index = 0;
int curPos = 0;
- if (input.length() == 0) {
+ if (inputSeq.length() == 0) {
return new String [] {""}; //$NON-NLS-1$
} else {
while (mat.find() && (index + 1 < limit || limit <= 0)) {
- res.add(input.subSequence(curPos, mat.start()).toString());
+ res.add(inputSeq.subSequence(curPos, mat.start()).toString());
curPos = mat.end();
index++;
}
- res.add(input.subSequence(curPos, input.length()).toString());
+ res.add(inputSeq.subSequence(curPos, inputSeq.length()).toString());
index++;
/*
@@ -182,95 +236,79 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Splits a given input around occurrences of a regular expression. This is
+ * a convenience method that is equivalent to calling the method
+ * {@link #split(java.lang.CharSequence, int)} with a limit of 0.
+ *
+ * @param input
+ * the input sequence.
+ *
+ * @return the resulting array.
*/
public String[] split(CharSequence input) {
return split(input, 0);
}
- /**
- * Returns the pattern string passed to the compile method
- *
- * @return A string representation of the pattern
- */
+ /**
+ * Returns the regular expression that was compiled into this
+ * {@code Pattern}.
+ *
+ * @return the regular expression.
+ */
public String pattern() {
return lexemes.toString();
}
- /**
- * Return a textual representation of the pattern.
- *
- * @return The regular expression string
- */
+ @Override
public String toString() {
return this.pattern();
}
- /**
- * Return the mask of flags used to compile the pattern
- *
- * @return A mask of flags used to compile the pattern.
- */
+ /**
+ * Returns the flags that have been set for this {@code Pattern}.
+ *
+ * @return the flags that have been set. A combination of the constants
+ * defined in this class.
+ *
+ * @see #CANON_EQ
+ * @see #CASE_INSENSITIVE
+ * @see #COMMENTS
+ * @see #DOTALL
+ * @see #LITERAL
+ * @see #MULTILINE
+ * @see #UNICODE_CASE
+ * @see #UNIX_LINES
+ */
public int flags() {
return this.flags;
}
- /**
- * Return a compiled pattern corresponding to the input regular expression
- * string.
- *
- * The input <code>flags</code> is a mask of the following flags:
- * <dl>
- * <dt><code>UNIX_LINES</code> (0x0001)
- * <dd>Enables UNIX lines mode where only \n is recognized as a line
- * terminator. The default setting of this flag is <em>off</em> indicating
- * that all of the following character sequences are recognized as line
- * terminators: \n, \r, \r\n, NEL (\u0085), \u2028 and \u2029.
- * <dt><code>CASE_INSENSITIVE</code> (0x0002)
- * <dd>Directs matching to be done in a way that ignores differences in
- * case. If input character sequences are encoded in character sets other
- * than ASCII, then the UNICODE_CASE must also be set to enable Unicode case
- * detection.
- * <dt><code>UNICODE_CASE</code> (0x0040)
- * <dd>Enables Unicode case folding if used in conjunction with the
- * <code>CASE_INSENSITIVE</code> flag. If <code>CASE_INSENSITIVE</code>
- * is not set, then this flag has no effect.
- * <dt><code>COMMENTS</code> (0x0004)
- * <dd>Directs the pattern compiler to ignore whitespace and comments in
- * the pattern. Whitespace consists of sequences including only these
- * characters: SP (\u0020), HT (\t or \u0009), LF (\n or ), VT
- * (\u000b), FF (\f or \u000c), and CR (\r or ). A comment is any
- * sequence of characters beginning with the "#" (\u0023) character and
- * ending in a LF character.
- * <dt><code>MULTILINE</code> (0x0008)
- * <dd>Turns on multiple line mode for matching of character sequences. By
- * default, this mode is off so that the character "^" (\u005e) matches
- * the beginning of the entire input sequence and the character "$"
- * (\u0024) matches the end of the input character sequence. In multiple
- * line mode, the character "^" matches any character in the input sequence
- * which immediately follows a line terminator and the character "$" matches
- * any character in the input sequence which immediately precedes a line
- * terminator.
- * <dt><code>DOTALL</code> (0x0020)
- * <dd>Enables the DOT (".") character in regular expressions to match line
- * terminators. By default, line terminators are not matched by DOT.
- * <dt><code>CANON_EQ</code> (0x0080)
- * <dd>Enables matching of character sequences which are canonically
- * equivalent according to the Unicode standard. Canonical equivalence is
- * described here: http://www.unicode.org/reports/tr15/. By default,
- * canonical equivalence is not detected while matching.
- * </dl>
- *
- * @param regex
- * A regular expression string.
+ /**
+ * Compiles a regular expression, creating a new {@code Pattern} instance in
+ * the process. Allows to set some flags that modify the behavior of the
+ * {@code Pattern}.
+ *
+ * @param pattern
+ * the regular expression.
* @param flags
- * A set of flags to control the compilation of the pattern.
- * @return A compiled pattern
+ * the flags to set. Basically, any combination of the constants
+ * defined in this class is valid.
+ *
+ * @return the new {@code Pattern} instance.
+ *
* @throws PatternSyntaxException
- * If the input regular expression does not match the required
- * grammar.
+ * if the regular expression is syntactically incorrect.
+ *
+ * @see #CANON_EQ
+ * @see #CASE_INSENSITIVE
+ * @see #COMMENTS
+ * @see #DOTALL
+ * @see #LITERAL
+ * @see #MULTILINE
+ * @see #UNICODE_CASE
+ * @see #UNIX_LINES
*/
- public static Pattern compile(String regex, int flags)
+ public static Pattern compile(String pattern, int flags)
throws PatternSyntaxException {
if ((flags != 0) &&
@@ -281,7 +319,7 @@
AbstractSet.counter = 1;
- return new Pattern().compileImpl(regex, flags);
+ return new Pattern().compileImpl(pattern, flags);
}
/**
@@ -294,11 +332,11 @@
*
* @return Compiled pattern
*/
- private Pattern compileImpl(String regex, int flags)
+ private Pattern compileImpl(String pattern, int flags)
throws PatternSyntaxException {
- this.lexemes = new Lexer(regex, flags);
+ this.lexemes = new Lexer(pattern, flags);
this.flags = flags;
- this.pattern = regex;
+ this.pattern = pattern;
start = processExpression(-1, this.flags, null);
if (!lexemes.isEmpty()) {
@@ -1276,9 +1314,19 @@
return new UCIRangeSet(charClass);
}
}
-
+
/**
- * @com.intel.drl.spec_ref
+ * Compiles a regular expression, creating a new Pattern instance in the
+ * process. This is actually a convenience method that calls {@link
+ * #compile(String, int)} with a {@code flags} value of zero.
+ *
+ * @param pattern
+ * the regular expression.
+ *
+ * @return the new {@code Pattern} instance.
+ *
+ * @throws PatternSyntaxException
+ * if the regular expression is syntactically incorrect.
*/
public static Pattern compile(String pattern) {
return compile(pattern, 0);
@@ -1298,14 +1346,39 @@
}
}
-
+
/**
- * @com.intel.drl.spec_ref
+ * Tries to match a given regular expression against a given input. This is
+ * actually nothing but a convenience method that compiles the regular
+ * expression into a {@code Pattern}, builds a {@link Matcher} for it, and
+ * then does the match. If the same regular expression is used for multiple
+ * operations, it is recommended to compile it into a {@code Pattern}
+ * explicitly and request a reusable {@code Matcher}.
+ *
+ * @param regex
+ * the regular expression.
+ * @param input
+ * the input to process.
+ *
+ * @return true if and only if the {@code Pattern} matches the input.
+ *
+ * @see Pattern#compile(java.lang.String, int)
+ * @see Matcher#matches()
*/
public static boolean matches(String regex, CharSequence input) {
return Pattern.compile(regex).matcher(input).matches();
}
+ /**
+ * Quotes a given string using "\Q" and "\E", so that all other
+ * meta-characters lose their special meaning. If the string is used for a
+ * {@code Pattern} afterwards, it can only be matched literally.
+ *
+ * @param s
+ * the string to quote.
+ *
+ * @return the quoted string.
+ */
public static String quote(String s) {
StringBuffer sb = new StringBuffer().append("\\Q"); //$NON-NLS-1$
int apos = 0;
Modified: harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java?rev=766375&r1=766374&r2=766375&view=diff
==============================================================================
--- harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java (original)
+++ harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PatternSyntaxException.java Sat Apr 18 18:46:32 2009
@@ -26,39 +26,74 @@
import org.apache.harmony.regex.internal.nls.Messages;
/**
- * @com.intel.drl.spec_ref
- *
+ * Encapsulates a syntax error that occurred during the compilation of a
+ * {@link Pattern}. Might include a detailed description, the original regular
+ * expression, and the index at which the error occurred.
+ *
+ * @see Pattern#compile(String)
+ * @see Pattern#compile(java.lang.String,int)
+ *
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.7.2.2 $
*/
public class PatternSyntaxException extends IllegalArgumentException {
private static final long serialVersionUID = -3864639126226059218L;
-
+
+ /**
+ * Holds the description of the syntax error, or null if the description is
+ * not known.
+ */
private String desc;
-
+
+ /**
+ * Holds the syntactically incorrect regular expression, or null if the
+ * regular expression is not known.
+ */
private String pattern;
-
+
+ /**
+ * Holds the index around which the error occured, or -1, in case it is
+ * unknown.
+ */
private int index = -1;
/**
- * @com.intel.drl.spec_ref
+ * Creates a new PatternSyntaxException for a given message, pattern, and
+ * error index.
+ *
+ * @param description
+ * the description of the syntax error, or {@code null} if the
+ * description is not known.
+ * @param pattern
+ * the syntactically incorrect regular expression, or
+ * {@code null} if the regular expression is not known.
+ * @param index
+ * the character index around which the error occurred, or -1 if
+ * the index is not known.
*/
- public PatternSyntaxException(String desc, String pattern, int index) {
- this.desc = desc;
+ public PatternSyntaxException(String description, String pattern, int index) {
+ this.desc = description;
this.pattern = pattern;
this.index = index;
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the syntactically incorrect regular expression.
+ *
+ * @return the regular expression.
+ *
*/
public String getPattern() {
return pattern;
}
/**
- * @com.intel.drl.spec_ref
+ * Returns a detailed error message for the exception. The message is
+ * potentially multi-line, and it might include a detailed description, the
+ * original regular expression, and the index at which the error occured.
+ *
+ * @return the error message.
*/
public String getMessage() {
String filler = ""; //$NON-NLS-1$
@@ -73,14 +108,21 @@
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the description of the syntax error, or {@code null} if the
+ * description is not known.
+ *
+ * @return the description.
*/
public String getDescription() {
return desc;
}
/**
- * @com.intel.drl.spec_ref
+ * Returns the character index around which the error occurred, or -1 if the
+ * index is not known.
+ *
+ * @return the index.
+ *
*/
public int getIndex() {
return index;