You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by ba...@apache.org on 2002/07/05 08:33:05 UTC

cvs commit: jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang CharRange.java CharSet.java

bayard      2002/07/04 23:33:05

  Added:       lang/src/java/org/apache/commons/lang CharRange.java
                        CharSet.java
  Log:
  Extracted from Strings.java.
  Methods in Strings.java that apply to CharSet are currently static methods on
  CharSet. Instance/static public/private decisions still to be made.
  
  Revision  Changes    Path
  1.1                  jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang/CharRange.java
  
  Index: CharRange.java
  ===================================================================
  package org.apache.commons.lang;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Commons", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  /**
   * A range of characters. Able to understand the idea of a contiguous 
   * sublist of an alphbet, a negated concept, and a set of characters.
   * Used by StringUtil to handle sets of characters.
   *
   * @author bayard@generationjava.com
   * @version $Id: CharRange.java,v 1.1 2002/07/05 06:33:05 bayard Exp $
   */
  class CharRange {
  
      /**
       * Used internally to represent null in a char.
       */
      static private char UNSET;
  
      private char start;
      private char close;
      private boolean negated;
  
      /**
       * Construct a CharRange over a single character.
       *
       * @param start char over which this range is placed
       */
      public CharRange(char start) {
          this.start = start;
      }
  
      /**
       * Construct a CharRange over a set of characters.
       *
       * @param start char start character in this range. inclusive
       * @param close char close character in this range. inclusive
       */
      public CharRange(char start, char close) {
          this.start = start;
          this.close = close;
      }
  
      /**
       * Construct a CharRange over a set of characters.
       *
       * @param start String start first character is in this range (inclusive).
       * @param close String first character is close character in this
       * range (inclusive).
       */
      public CharRange(String start, String close) {
          this.start = start.charAt(0);
          this.close = close.charAt(0);
      }
  
      public char getStart() {
          return this.start;
      }
  
      public char getEnd() {
          return this.close;
      }
  
      public void setStart(char ch) {
          this.start = ch;
      }
  
      public void setEnd(char ch) {
          this.close = ch;
      }
  
      /**
       * Is this CharRange over many characters
       *
       * @return boolean true is many characters
       */
      public boolean isRange() {
          return this.close != UNSET;
      }
  
      /**
       * Is the passed in character inside this range
       *
       * @return boolean true is in range
       */
      public boolean inRange(char ch) {
          if(isRange()) {
              return ((ch >= start) && (ch <= close) );
          } else {
              return start == ch;
          }
      }
  
      /**
       * Is this CharRange negated
       *
       * @return boolean true is negated
       */
      public boolean isNegated() {
          return negated;
      }
  
      /**
       * Make this character range be negated. 
       * This implies that this CharRange is over all characters except 
       * the ones in this range.
       */
      public void setNegated(boolean b) {
          this.negated = b;
      }
  
      public String toString() {
          String str = "";
          if(isNegated()) {
              str += "^";
          }
          str += start;
          if(isRange()) {
              str += "-";
              str += close;
          }
          return str;
      }
  }
  
  
  
  
  
  1.1                  jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang/CharSet.java
  
  Index: CharSet.java
  ===================================================================
  package org.apache.commons.lang;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Commons", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.util.Iterator;
  import java.util.List;
  import java.util.LinkedList;
  
  /**
   * A set of characters. You can iterate over the characters in the 
   * set. Also provided
   *
   * @author bayard@generationjava.com
   * @version $Id: CharSet.java,v 1.1 2002/07/05 06:33:05 bayard Exp $
   */
  public class CharSet {
  
      /**
       * Squeezes any repititions of a character that is mentioned in the 
       * supplied set. An example is:
       *    squeeze("hello", {"el"})  => "helo"
       * See evaluateSet for set-syntax.
       */
      public static String squeeze(String str, String[] set) {
          CharSet chars = evaluateSet(set);
          StringBuffer buffer = new StringBuffer(str.length());
          char[] chrs = str.toCharArray();
          int sz = chrs.length;
          char lastChar = ' ';
          char ch = ' ';
          for(int i=0; i<sz; i++) {
              ch = chrs[i];
              if(chars.contains(ch)) {
                  if( (ch == lastChar) && (i != 0) ) {
                      continue;
                  }
              }
              buffer.append(ch);
              lastChar = ch;
          }
          return buffer.toString();
      }
  
      /**
       * Creates a CharSet object which allows a certain amount of 
       * set logic to be performed upon the following syntax:
       *
       * "aeio" which implies 'a','e',..
       * "^e" implies not e. However it only negates, it's not 
       * a set in itself due to the size of that set in unicode.
       * "ej-m" implies e,j->m. e,j,k,l,m.
       */
      public static CharSet evaluateSet(String[] set) {
          return new CharSet(set); 
      }
  
      public static int count(String str, String set) {
          String[] strs = new String[1];
          strs[0] = set;
          return count(str, strs);
      }
      /**
       * Takes an argument in set-syntax, see evaluateSet,
       * and returns the number of characters present in the specified string.
       * An example would be:   count("hello", {"c-f","o"}) returns 2.
       *
       * @param str String target to count characters in
       * @param str String[] set of characters to count
       */
      public static int count(String str, String[] set) {
          CharSet chars = evaluateSet(set);
          int count = 0;
          char[] chrs = str.toCharArray();
          int sz = chrs.length;
          for(int i=0; i<sz; i++) {
              if(chars.contains(chrs[i])) {
                  count++;
              }
          }
          return count;
      }
  
      public static String delete(String str, String set) {
          String[] strs = new String[1];
          strs[0] = set;
          return delete(str, strs);
      }
      /**
       * Takes an argument in set-syntax, see evaluateSet,
       * and deletes any of characters present in the specified string.
       * An example would be:   delete("hello", {"c-f","o"}) returns "hll"
       *
       * @param str String target to delete characters from
       * @param str String[] set of characters to delete
       */
      public static String delete(String str, String[] set) {
          CharSet chars = evaluateSet(set);
          StringBuffer buffer = new StringBuffer(str.length());
          char[] chrs = str.toCharArray();
          int sz = chrs.length;
          for(int i=0; i<sz; i++) {
              if(!chars.contains(chrs[i])) {
                  buffer.append(chrs[i]);
              }
          }
          return buffer.toString();
      }
  
      public static String squeeze(String str, String set) {
          String[] strs = new String[1];
          strs[0] = set;
          return squeeze(str, strs);
      }
  
      // used to be a com.generationjava.collections.typed.TypedList
      private LinkedList set = new LinkedList();
  
      public CharSet(String[] set) {
          int sz = set.length;
          for(int i=0; i<sz; i++) {
              add(set[i]);
          }
      }
  
      public boolean contains(char ch) {
          Iterator iterator = set.iterator();
          boolean bool = false;
          while(iterator.hasNext()) {
              CharRange range = (CharRange)iterator.next();
              if(range.isNegated()) {
                  if(!range.inRange(ch)) {
                      bool = true;
                  }
              } else {
                  if(range.inRange(ch)) {
                      bool = true;
                  }
              }
          }
          return bool;
      }
  
      public void add(String str) {
          int sz = str.length();
          CharRange range = null;
  
          if("-".equals(str)) {
              range = new CharRange('_');
              set.add(range);
              return;
          } 
  
          boolean end = false;
          boolean negated = false;
          for(int i=0; i<sz; i++) {
              char ch = str.charAt(i);
              if(ch == '-') {
                  end = true;
                  continue;
              }
              if(end) {
                  range.setEnd(ch);
                  continue;
              }
              if(ch == '^') {
                  negated = true;
                  continue;
              }
              range = new CharRange(ch);
              range.setNegated(negated);
              set.add(range);
          }
      }
  
      public String toString() {
          return set.toString();
      }
  
  }
  
  
  
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>