You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@apache.org on 2002/05/31 05:17:43 UTC
cvs commit: jakarta-oro/src/java/org/apache/oro/text/perl Perl5Util.java
dfs 2002/05/30 20:17:43
Modified: . CHANGES build.properties
src/java/org/apache/oro/text/perl Perl5Util.java
Log:
PR: 8298
Changed behavior of Perl5Util.split() to remove trailing empty fields
in order to comply with Perl5 behavior. Incremented version to 2.0.7-dev-1.
Revision Changes Path
1.27 +7 -1 jakarta-oro/CHANGES
Index: CHANGES
===================================================================
RCS file: /home/cvs/jakarta-oro/CHANGES,v
retrieving revision 1.26
retrieving revision 1.27
diff -u -r1.26 -r1.27
--- CHANGES 30 Mar 2002 04:12:29 -0000 1.26
+++ CHANGES 31 May 2002 03:17:42 -0000 1.27
@@ -1,4 +1,10 @@
-$Id: CHANGES,v 1.26 2002/03/30 04:12:29 dfs Exp $
+$Id: CHANGES,v 1.27 2002/05/31 03:17:42 dfs Exp $
+
+Version 2.x.x
+
+o Changed behavior of Perl5Util.split() to match Perl's behavior, where
+ "leading empty fields are preserved, and empty trailing one are
+ deleted." Util.split() is left unchanged.
Version 2.0.6
1.4 +2 -2 jakarta-oro/build.properties
Index: build.properties
===================================================================
RCS file: /home/cvs/jakarta-oro/build.properties,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- build.properties 30 Mar 2002 04:14:45 -0000 1.3
+++ build.properties 31 May 2002 03:17:42 -0000 1.4
@@ -1,5 +1,5 @@
# ------------------------------------------------------------------------
-# $Id: build.properties,v 1.3 2002/03/30 04:14:45 dfs Exp $
+# $Id: build.properties,v 1.4 2002/05/31 03:17:42 dfs Exp $
#
# This file controls various properties which may be set during a build.
#
@@ -12,7 +12,7 @@
# Name and version information
name=Jakarta-ORO
project=jakarta-oro
-version=2.0.6
+version=2.0.7-dev-1
# Name and version of the project
project.name=${project}-${version}
1.13 +38 -10 jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java
Index: Perl5Util.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- Perl5Util.java 19 Feb 2002 16:16:12 -0000 1.12
+++ Perl5Util.java 31 May 2002 03:17:43 -0000 1.13
@@ -58,7 +58,7 @@
*/
/*
- * $Id: Perl5Util.java,v 1.12 2002/02/19 16:16:12 dfs Exp $
+ * $Id: Perl5Util.java,v 1.13 2002/05/31 03:17:43 dfs Exp $
*/
import java.util.*;
@@ -169,11 +169,16 @@
/** The hashtable to cache higher-level expressions */
private Cache __expressionCache;
/** The pattern matcher to perform matching operations. */
- private Perl5Matcher __matcher = new Perl5Matcher();
+ private Perl5Matcher __matcher;
/** The compiled match expression parsing regular expression. */
private Pattern __matchPattern;
/** The last match from a successful call to a matching method. */
private MatchResult __lastMatch;
+ /**
+ * A container for temporarily holding the results of a split before
+ * deleting trailing empty fields.
+ */
+ private ArrayList __splitList;
/**
* Keeps track of the original input (for postMatch() and preMatch())
@@ -217,6 +222,8 @@
* </pre>
*/
public Perl5Util(PatternCache cache) {
+ __splitList = new ArrayList();
+ __matcher = new Perl5Matcher();
__patternCache = cache;
__expressionCache = new CacheLRU(cache.capacity());
__compilePatterns();
@@ -766,19 +773,25 @@
* <blockquote><pre>
* { "8", "-", "12", ",", "15", ",", "18" }
* </pre></blockquote>
+ * Furthermore, the following Perl behavior is observed: "leading empty
+ * fields are preserved, and empty trailing one are deleted." This
+ * has the effect that a split on a zero length string returns an empty
+ * list.
* The {@link org.apache.oro.text.regex.Util#split Util.split()} method
- * does NOT implement this particular behavior because it is intended to
- * be usable with Pattern instances other than Perl5Pattern.
+ * does NOT implement these behaviors because it is intended to
+ * be a general self-consistent and predictable split function usable
+ * with Pattern instances other than Perl5Pattern.
* <p>
* @param results
- * A <code> List </code> to which the substrings of the input
+ * A <code> Collection </code> to which the substrings of the input
* that occur between the regular expression delimiter occurences
* are appended. The input will not be split into any more substrings
* than the specified
* limit. A way of thinking of this is that only the first
* <b>limit - 1</b>
* matches of the delimiting regular expression will be used to split the
- * input.
+ * input. The Collection must support the
+ * <code>addAll(Collection)</code> operation.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String to split.
* @param limit The limit on the size of the returned <code>Vector</code>.
@@ -809,20 +822,35 @@
while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) {
currentResult = __matcher.getMatch();
- results.add(input.substring(beginOffset,
- currentResult.beginOffset(0)));
+ __splitList.add(input.substring(beginOffset,
+ currentResult.beginOffset(0)));
+
if((groups = currentResult.groups()) > 1) {
for(index = 1; index < groups; ++index) {
group = currentResult.group(index);
if(group != null && group.length() > 0)
- results.add(group);
+ __splitList.add(group);
}
}
beginOffset = currentResult.endOffset(0);
}
- results.add(input.substring(beginOffset, input.length()));
+ __splitList.add(input.substring(beginOffset, input.length()));
+
+ // Remove all trailing empty fields.
+ for(int i = __splitList.size() - 1; i >= 0; --i) {
+ String str;
+
+ str = (String)__splitList.get(i);
+ if(str.length() == 0)
+ __splitList.remove(i);
+ else
+ break;
+ }
+
+ results.addAll(__splitList);
+ __splitList.clear();
// Just for the sake of completeness
__lastMatch = currentResult;
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>