You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by df...@apache.org on 2002/05/31 05:17:43 UTC

cvs commit: jakarta-oro/src/java/org/apache/oro/text/perl Perl5Util.java

dfs         2002/05/30 20:17:43

  Modified:    .        CHANGES build.properties
               src/java/org/apache/oro/text/perl Perl5Util.java
  Log:
  PR: 8298
  Changed behavior of Perl5Util.split() to remove trailing empty fields
  in order to comply with Perl5 behavior.  Incremented version to 2.0.7-dev-1.
  
  Revision  Changes    Path
  1.27      +7 -1      jakarta-oro/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/CHANGES,v
  retrieving revision 1.26
  retrieving revision 1.27
  diff -u -r1.26 -r1.27
  --- CHANGES	30 Mar 2002 04:12:29 -0000	1.26
  +++ CHANGES	31 May 2002 03:17:42 -0000	1.27
  @@ -1,4 +1,10 @@
  -$Id: CHANGES,v 1.26 2002/03/30 04:12:29 dfs Exp $
  +$Id: CHANGES,v 1.27 2002/05/31 03:17:42 dfs Exp $
  +
  +Version 2.x.x
  +
  +o Changed behavior of Perl5Util.split() to match Perl's  behavior, where
  + "leading empty fields are preserved, and empty trailing one are
  + deleted."  Util.split() is left unchanged.
   
   Version 2.0.6
   
  
  
  
  1.4       +2 -2      jakarta-oro/build.properties
  
  Index: build.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/build.properties,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- build.properties	30 Mar 2002 04:14:45 -0000	1.3
  +++ build.properties	31 May 2002 03:17:42 -0000	1.4
  @@ -1,5 +1,5 @@
   # ------------------------------------------------------------------------
  -# $Id: build.properties,v 1.3 2002/03/30 04:14:45 dfs Exp $
  +# $Id: build.properties,v 1.4 2002/05/31 03:17:42 dfs Exp $
   #
   # This file controls various properties which may be set during a build.
   #
  @@ -12,7 +12,7 @@
   # Name and version information
   name=Jakarta-ORO
   project=jakarta-oro
  -version=2.0.6
  +version=2.0.7-dev-1
   
   # Name and version of the project
   project.name=${project}-${version}
  
  
  
  1.13      +38 -10    jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java
  
  Index: Perl5Util.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- Perl5Util.java	19 Feb 2002 16:16:12 -0000	1.12
  +++ Perl5Util.java	31 May 2002 03:17:43 -0000	1.13
  @@ -58,7 +58,7 @@
    */
   
   /*
  - * $Id: Perl5Util.java,v 1.12 2002/02/19 16:16:12 dfs Exp $
  + * $Id: Perl5Util.java,v 1.13 2002/05/31 03:17:43 dfs Exp $
    */
   import java.util.*;
   
  @@ -169,11 +169,16 @@
     /** The hashtable to cache higher-level expressions */
     private Cache __expressionCache;
     /** The pattern matcher to perform matching operations. */
  -  private Perl5Matcher __matcher = new Perl5Matcher();
  +  private Perl5Matcher __matcher;
     /** The compiled match expression parsing regular expression. */
     private Pattern __matchPattern;
     /** The last match from a successful call to a matching method. */
     private MatchResult __lastMatch;
  +  /**
  +   * A container for temporarily holding the results of a split before
  +   * deleting trailing empty fields.
  +   */
  +  private ArrayList __splitList;
   
     /**
      * Keeps track of the original input (for postMatch() and preMatch())
  @@ -217,6 +222,8 @@
      * </pre>
      */
     public Perl5Util(PatternCache cache) {
  +    __splitList    = new ArrayList();
  +    __matcher      = new Perl5Matcher();
       __patternCache = cache;
       __expressionCache = new CacheLRU(cache.capacity());
       __compilePatterns();
  @@ -766,19 +773,25 @@
      * <blockquote><pre>
      * { "8", "-", "12", ",", "15", ",", "18" }
      * </pre></blockquote>
  +   * Furthermore, the following Perl behavior is observed: "leading empty
  +   * fields are preserved, and empty trailing one are deleted."  This
  +   * has the effect that a split on a zero length string returns an empty
  +   * list.
      * The {@link org.apache.oro.text.regex.Util#split Util.split()} method
  -   * does NOT implement this particular behavior because it is intended to
  -   * be usable with Pattern instances other than Perl5Pattern.
  +   * does NOT implement these behaviors because it is intended to
  +   * be a general self-consistent and predictable split function usable
  +   * with Pattern instances other than Perl5Pattern.
      * <p>
      * @param results 
  -   *    A <code> List </code> to which the substrings of the input
  +   *    A <code> Collection </code> to which the substrings of the input
      *    that occur between the regular expression delimiter occurences
      *    are appended. The input will not be split into any more substrings
      *    than the specified 
      *    limit. A way of thinking of this is that only the first
      *    <b>limit - 1</b>
      *    matches of the delimiting regular expression will be used to split the
  -   *    input. 
  +   *    input.  The Collection must support the
  +   *    <code>addAll(Collection)</code> operation.
      * @param pattern The regular expression to use as a split delimiter.
      * @param input The String to split.
      * @param limit The limit on the size of the returned <code>Vector</code>.
  @@ -809,20 +822,35 @@
       while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) {
         currentResult = __matcher.getMatch();
   
  -      results.add(input.substring(beginOffset,
  -				  currentResult.beginOffset(0)));
  +      __splitList.add(input.substring(beginOffset,
  +				      currentResult.beginOffset(0)));
  +
         if((groups = currentResult.groups()) > 1) {
   	for(index = 1; index < groups; ++index) {
   	  group = currentResult.group(index);
   	  if(group != null && group.length() > 0)
  -	    results.add(group);
  +	    __splitList.add(group);
   	}
         }
   
         beginOffset = currentResult.endOffset(0);
       }
   
  -    results.add(input.substring(beginOffset, input.length()));
  +    __splitList.add(input.substring(beginOffset, input.length()));
  +
  +    // Remove all trailing empty fields.
  +    for(int i = __splitList.size() - 1; i >= 0; --i) {
  +      String str;
  +
  +      str = (String)__splitList.get(i);
  +      if(str.length() == 0)
  +	__splitList.remove(i);
  +      else
  +	break;
  +    }
  +
  +    results.addAll(__splitList);
  +    __splitList.clear();
   
       // Just for the sake of completeness
       __lastMatch = currentResult;
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>