You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by ba...@apache.org on 2002/07/05 08:33:05 UTC
cvs commit: jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang CharRange.java CharSet.java
bayard 2002/07/04 23:33:05
Added: lang/src/java/org/apache/commons/lang CharRange.java
CharSet.java
Log:
Extracted from Strings.java.
Methods in Strings.java that apply to CharSet are currently static methods on
CharSet. Instance/static public/private decisions still to be made.
Revision Changes Path
1.1 jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang/CharRange.java
Index: CharRange.java
===================================================================
package org.apache.commons.lang;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/**
* A range of characters. Able to understand the idea of a contiguous
* sublist of an alphbet, a negated concept, and a set of characters.
* Used by StringUtil to handle sets of characters.
*
* @author bayard@generationjava.com
* @version $Id: CharRange.java,v 1.1 2002/07/05 06:33:05 bayard Exp $
*/
class CharRange {
/**
* Used internally to represent null in a char.
*/
static private char UNSET;
private char start;
private char close;
private boolean negated;
/**
* Construct a CharRange over a single character.
*
* @param start char over which this range is placed
*/
public CharRange(char start) {
this.start = start;
}
/**
* Construct a CharRange over a set of characters.
*
* @param start char start character in this range. inclusive
* @param close char close character in this range. inclusive
*/
public CharRange(char start, char close) {
this.start = start;
this.close = close;
}
/**
* Construct a CharRange over a set of characters.
*
* @param start String start first character is in this range (inclusive).
* @param close String first character is close character in this
* range (inclusive).
*/
public CharRange(String start, String close) {
this.start = start.charAt(0);
this.close = close.charAt(0);
}
public char getStart() {
return this.start;
}
public char getEnd() {
return this.close;
}
public void setStart(char ch) {
this.start = ch;
}
public void setEnd(char ch) {
this.close = ch;
}
/**
* Is this CharRange over many characters
*
* @return boolean true is many characters
*/
public boolean isRange() {
return this.close != UNSET;
}
/**
* Is the passed in character inside this range
*
* @return boolean true is in range
*/
public boolean inRange(char ch) {
if(isRange()) {
return ((ch >= start) && (ch <= close) );
} else {
return start == ch;
}
}
/**
* Is this CharRange negated
*
* @return boolean true is negated
*/
public boolean isNegated() {
return negated;
}
/**
* Make this character range be negated.
* This implies that this CharRange is over all characters except
* the ones in this range.
*/
public void setNegated(boolean b) {
this.negated = b;
}
public String toString() {
String str = "";
if(isNegated()) {
str += "^";
}
str += start;
if(isRange()) {
str += "-";
str += close;
}
return str;
}
}
1.1 jakarta-commons-sandbox/lang/src/java/org/apache/commons/lang/CharSet.java
Index: CharSet.java
===================================================================
package org.apache.commons.lang;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import java.util.Iterator;
import java.util.List;
import java.util.LinkedList;
/**
* A set of characters. You can iterate over the characters in the
* set. Also provided
*
* @author bayard@generationjava.com
* @version $Id: CharSet.java,v 1.1 2002/07/05 06:33:05 bayard Exp $
*/
public class CharSet {
/**
* Squeezes any repititions of a character that is mentioned in the
* supplied set. An example is:
* squeeze("hello", {"el"}) => "helo"
* See evaluateSet for set-syntax.
*/
public static String squeeze(String str, String[] set) {
CharSet chars = evaluateSet(set);
StringBuffer buffer = new StringBuffer(str.length());
char[] chrs = str.toCharArray();
int sz = chrs.length;
char lastChar = ' ';
char ch = ' ';
for(int i=0; i<sz; i++) {
ch = chrs[i];
if(chars.contains(ch)) {
if( (ch == lastChar) && (i != 0) ) {
continue;
}
}
buffer.append(ch);
lastChar = ch;
}
return buffer.toString();
}
/**
* Creates a CharSet object which allows a certain amount of
* set logic to be performed upon the following syntax:
*
* "aeio" which implies 'a','e',..
* "^e" implies not e. However it only negates, it's not
* a set in itself due to the size of that set in unicode.
* "ej-m" implies e,j->m. e,j,k,l,m.
*/
public static CharSet evaluateSet(String[] set) {
return new CharSet(set);
}
public static int count(String str, String set) {
String[] strs = new String[1];
strs[0] = set;
return count(str, strs);
}
/**
* Takes an argument in set-syntax, see evaluateSet,
* and returns the number of characters present in the specified string.
* An example would be: count("hello", {"c-f","o"}) returns 2.
*
* @param str String target to count characters in
* @param str String[] set of characters to count
*/
public static int count(String str, String[] set) {
CharSet chars = evaluateSet(set);
int count = 0;
char[] chrs = str.toCharArray();
int sz = chrs.length;
for(int i=0; i<sz; i++) {
if(chars.contains(chrs[i])) {
count++;
}
}
return count;
}
public static String delete(String str, String set) {
String[] strs = new String[1];
strs[0] = set;
return delete(str, strs);
}
/**
* Takes an argument in set-syntax, see evaluateSet,
* and deletes any of characters present in the specified string.
* An example would be: delete("hello", {"c-f","o"}) returns "hll"
*
* @param str String target to delete characters from
* @param str String[] set of characters to delete
*/
public static String delete(String str, String[] set) {
CharSet chars = evaluateSet(set);
StringBuffer buffer = new StringBuffer(str.length());
char[] chrs = str.toCharArray();
int sz = chrs.length;
for(int i=0; i<sz; i++) {
if(!chars.contains(chrs[i])) {
buffer.append(chrs[i]);
}
}
return buffer.toString();
}
public static String squeeze(String str, String set) {
String[] strs = new String[1];
strs[0] = set;
return squeeze(str, strs);
}
// used to be a com.generationjava.collections.typed.TypedList
private LinkedList set = new LinkedList();
public CharSet(String[] set) {
int sz = set.length;
for(int i=0; i<sz; i++) {
add(set[i]);
}
}
public boolean contains(char ch) {
Iterator iterator = set.iterator();
boolean bool = false;
while(iterator.hasNext()) {
CharRange range = (CharRange)iterator.next();
if(range.isNegated()) {
if(!range.inRange(ch)) {
bool = true;
}
} else {
if(range.inRange(ch)) {
bool = true;
}
}
}
return bool;
}
public void add(String str) {
int sz = str.length();
CharRange range = null;
if("-".equals(str)) {
range = new CharRange('_');
set.add(range);
return;
}
boolean end = false;
boolean negated = false;
for(int i=0; i<sz; i++) {
char ch = str.charAt(i);
if(ch == '-') {
end = true;
continue;
}
if(end) {
range.setEnd(ch);
continue;
}
if(ch == '^') {
negated = true;
continue;
}
range = new CharRange(ch);
range.setNegated(negated);
set.add(range);
}
}
public String toString() {
return set.toString();
}
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>