You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by sa...@apache.org on 2003/01/13 18:12:54 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/util URI.java
sandygao 2003/01/13 09:12:54
Modified: java/src/org/apache/xerces/util URI.java
Log:
Committing the patch proposed by Michael Glavassevich to solve some
performance problem in the URI class. Thx Michael.
"
1) I created a lookup table for the character classes of URIs, to
eliminate the linear character searches through strings.
2) I modified the character checking methods, to use the lookup table, and
combined checks i.e. (isReservedCharacter || isUnreservedCharacter ->
isURICharacter).
3) I modified the signatures of initializeAuthority, and initializePath to
take string indexes, eliminating the need to create substrings when
calling these methods.
"
Revision Changes Path
1.7 +162 -52 xml-xerces/java/src/org/apache/xerces/util/URI.java
Index: URI.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- URI.java 10 May 2002 16:30:10 -0000 1.6
+++ URI.java 13 Jan 2003 17:12:54 -0000 1.7
@@ -120,21 +120,108 @@
}
}
- /** reserved characters */
+ private static final byte [] fgLookupTable = new byte[128];
+
+ /**
+ * Character Classes
+ */
+
+ /** reserved characters ;/?:@&=+$,[] */
//RFC 2732 added '[' and ']' as reserved characters
- //private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
- private static final String RESERVED_CHARACTERS = ";/?:@&=+$,[]";
-
- /** URI punctuation mark characters - these, combined with
+ private static final int RESERVED_CHARACTERS = 0x01;
+
+ /** URI punctuation mark characters: -_.!~*'() - these, combined with
alphanumerics, constitute the "unreserved" characters */
- private static final String MARK_CHARACTERS = "-_.!~*'()";
-
- /** scheme can be composed of alphanumerics and these characters */
- private static final String SCHEME_CHARACTERS = "+-.";
-
+ private static final int MARK_CHARACTERS = 0x02;
+
+ /** scheme can be composed of alphanumerics and these characters: +-. */
+ private static final int SCHEME_CHARACTERS = 0x04;
+
/** userinfo can be composed of unreserved, escaped and these
- characters */
- private static final String USERINFO_CHARACTERS = ";:&=+$,";
+ characters: ;:&=+$, */
+ private static final int USERINFO_CHARACTERS = 0x08;
+
+ /** ASCII letter characters */
+ private static final int ASCII_ALPHA_CHARACTERS = 0x10;
+
+ /** ASCII digit characters */
+ private static final int ASCII_DIGIT_CHARACTERS = 0x20;
+
+ /** ASCII hex characters */
+ private static final int ASCII_HEX_CHARACTERS = 0x40;
+
+ /** Mask for alpha-numeric characters */
+ private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS;
+
+ /** Mask for unreserved characters */
+ private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS;
+
+ /** Mask for URI allowable characters except for % */
+ private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS;
+
+ /** Mask for scheme characters */
+ private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS;
+
+ /** Mask for userinfo characters */
+ private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS;
+
+ static {
+ // Add ASCII Digits and ASCII Hex Numbers
+ for (int i = '0'; i <= '9'; ++i) {
+ fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS;
+ }
+
+ // Add ASCII Letters and ASCII Hex Numbers
+ for (int i = 'A'; i <= 'F'; ++i) {
+ fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
+ fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
+ }
+
+ // Add ASCII Letters
+ for (int i = 'G'; i <= 'Z'; ++i) {
+ fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS;
+ fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS;
+ }
+
+ // Add Reserved Characters
+ fgLookupTable[';'] |= RESERVED_CHARACTERS;
+ fgLookupTable['/'] |= RESERVED_CHARACTERS;
+ fgLookupTable['?'] |= RESERVED_CHARACTERS;
+ fgLookupTable[':'] |= RESERVED_CHARACTERS;
+ fgLookupTable['@'] |= RESERVED_CHARACTERS;
+ fgLookupTable['&'] |= RESERVED_CHARACTERS;
+ fgLookupTable['='] |= RESERVED_CHARACTERS;
+ fgLookupTable['+'] |= RESERVED_CHARACTERS;
+ fgLookupTable['$'] |= RESERVED_CHARACTERS;
+ fgLookupTable[','] |= RESERVED_CHARACTERS;
+ fgLookupTable['['] |= RESERVED_CHARACTERS;
+ fgLookupTable[']'] |= RESERVED_CHARACTERS;
+
+ // Add Mark Characters
+ fgLookupTable['-'] |= MARK_CHARACTERS;
+ fgLookupTable['_'] |= MARK_CHARACTERS;
+ fgLookupTable['.'] |= MARK_CHARACTERS;
+ fgLookupTable['!'] |= MARK_CHARACTERS;
+ fgLookupTable['~'] |= MARK_CHARACTERS;
+ fgLookupTable['*'] |= MARK_CHARACTERS;
+ fgLookupTable['\''] |= MARK_CHARACTERS;
+ fgLookupTable['('] |= MARK_CHARACTERS;
+ fgLookupTable[')'] |= MARK_CHARACTERS;
+
+ // Add Scheme Characters
+ fgLookupTable['+'] |= SCHEME_CHARACTERS;
+ fgLookupTable['-'] |= SCHEME_CHARACTERS;
+ fgLookupTable['.'] |= SCHEME_CHARACTERS;
+
+ // Add Userinfo Characters
+ fgLookupTable[';'] |= USERINFO_CHARACTERS;
+ fgLookupTable[':'] |= USERINFO_CHARACTERS;
+ fgLookupTable['&'] |= USERINFO_CHARACTERS;
+ fgLookupTable['='] |= USERINFO_CHARACTERS;
+ fgLookupTable['+'] |= USERINFO_CHARACTERS;
+ fgLookupTable['$'] |= USERINFO_CHARACTERS;
+ fgLookupTable[','] |= USERINFO_CHARACTERS;
+ }
/** Stores the scheme (usually the protocol) for this URI. */
private String m_scheme = null;
@@ -363,20 +450,21 @@
*/
private void initialize(URI p_base, String p_uriSpec)
throws MalformedURIException {
- if (p_base == null &&
- (p_uriSpec == null || p_uriSpec.trim().length() == 0)) {
+
+ String uriSpec = (p_uriSpec != null) ? p_uriSpec.trim() : null;
+ int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
+
+ if (p_base == null && uriSpecLen == 0) {
throw new MalformedURIException(
"Cannot initialize URI with empty parameters.");
- }
+ }
// just make a copy of the base if spec is empty
- if (p_uriSpec == null || p_uriSpec.trim().length() == 0) {
+ if (uriSpecLen == 0) {
initialize(p_base);
return;
}
- String uriSpec = p_uriSpec.trim();
- int uriSpecLen = uriSpec.length();
int index = 0;
// Check for scheme, which must be before '/', '?' or '#'. Also handle
@@ -403,7 +491,7 @@
// two slashes means generic URI syntax, so we get the authority
if (((index+1) < uriSpecLen) &&
- (uriSpec.substring(index).startsWith("//"))) {
+ (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
index += 2;
int startPos = index;
@@ -420,14 +508,14 @@
// if we found authority, parse it out, otherwise we set the
// host to empty string
if (index > startPos) {
- initializeAuthority(uriSpec.substring(startPos, index));
+ initializeAuthority(uriSpec, startPos, index);
}
else {
m_host = "";
}
}
- initializePath(uriSpec.substring(index));
+ initializePath(uriSpec, index);
// Resolve relative URI to base URI - see RFC 2396 Section 5.2
// In some cases, it might make more sense to throw an exception
@@ -582,14 +670,17 @@
* URI from a URI string spec.
*
* @param p_uriSpec the URI specification (cannot be null)
+ * @param p_nStartIndex the index to begin scanning from
+ * @param p_nEndIndex the index to end scanning at
*
* @exception MalformedURIException if p_uriSpec violates syntax rules
*/
- private void initializeAuthority(String p_uriSpec)
+ private void initializeAuthority(String p_uriSpec, int p_nStartIndex, int p_nEndIndex)
throws MalformedURIException {
- int index = 0;
- int start = 0;
- int end = p_uriSpec.length();
+
+ int index = p_nStartIndex;
+ int start = p_nStartIndex;
+ int end = p_nEndIndex;
char testChar = '\0';
String userinfo = null;
@@ -653,18 +744,19 @@
* Initialize the path for this URI from a URI string spec.
*
* @param p_uriSpec the URI specification (cannot be null)
+ * @param p_nStartIndex the index to begin scanning from
*
* @exception MalformedURIException if p_uriSpec violates syntax rules
*/
- private void initializePath(String p_uriSpec)
+ private void initializePath(String p_uriSpec, int p_nStartIndex)
throws MalformedURIException {
if (p_uriSpec == null) {
throw new MalformedURIException(
"Cannot initialize path from null string!");
}
- int index = 0;
- int start = 0;
+ int index = p_nStartIndex;
+ int start = p_nStartIndex;
int end = p_uriSpec.length();
char testChar = '\0';
@@ -683,8 +775,7 @@
"Path contains invalid escape sequence!");
}
}
- else if (!isReservedCharacter(testChar) &&
- !isUnreservedCharacter(testChar)) {
+ else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Path contains invalid character: " + testChar);
}
@@ -709,8 +800,7 @@
"Query string contains invalid escape sequence!");
}
}
- else if (!isReservedCharacter(testChar) &&
- !isUnreservedCharacter(testChar)) {
+ else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Query string contains invalid character:" + testChar);
}
@@ -734,8 +824,7 @@
"Fragment contains invalid escape sequence!");
}
}
- else if (!isReservedCharacter(testChar) &&
- !isUnreservedCharacter(testChar)) {
+ else if (!isURICharacter(testChar)) {
throw new MalformedURIException(
"Fragment contains invalid character:"+testChar);
}
@@ -942,8 +1031,7 @@
"Userinfo contains invalid escape sequence!");
}
}
- else if (!isUnreservedCharacter(testChar) &&
- USERINFO_CHARACTERS.indexOf(testChar) == -1) {
+ else if (!isUserinfoCharacter(testChar)) {
throw new MalformedURIException(
"Userinfo contains invalid character:"+testChar);
}
@@ -1019,7 +1107,7 @@
m_fragment = null;
}
else {
- initializePath(p_path);
+ initializePath(p_path, 0);
}
}
@@ -1221,8 +1309,7 @@
char testChar;
for (int i = 1; i < p_scheme.length(); i++) {
testChar = p_scheme.charAt(i);
- if (!isAlphanum(testChar) &&
- SCHEME_CHARACTERS.indexOf(testChar) == -1) {
+ if (!isSchemeCharacter(testChar)) {
return false;
}
}
@@ -1328,9 +1415,7 @@
* or 'A' and 'F', false otherwise
*/
private static boolean isHex(char p_char) {
- return (isDigit(p_char) ||
- (p_char >= 'a' && p_char <= 'f') ||
- (p_char >= 'A' && p_char <= 'F'));
+ return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0);
}
/**
@@ -1339,8 +1424,7 @@
* @return true if the char is alphabetic, false otherwise
*/
private static boolean isAlpha(char p_char) {
- return ((p_char >= 'a' && p_char <= 'z') ||
- (p_char >= 'A' && p_char <= 'Z' ));
+ return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' ));
}
/**
@@ -1349,17 +1433,17 @@
* @return true if the char is alphanumeric, false otherwise
*/
private static boolean isAlphanum(char p_char) {
- return (isAlpha(p_char) || isDigit(p_char));
+ return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0);
}
/**
* Determine whether a character is a reserved character:
- * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
+ * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']'
*
* @return true if the string contains any reserved characters
*/
private static boolean isReservedCharacter(char p_char) {
- return RESERVED_CHARACTERS.indexOf(p_char) != -1;
+ return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0);
}
/**
@@ -1368,8 +1452,35 @@
* @return true if the char is unreserved, false otherwise
*/
private static boolean isUnreservedCharacter(char p_char) {
- return (isAlphanum(p_char) ||
- MARK_CHARACTERS.indexOf(p_char) != -1);
+ return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0);
+ }
+
+ /**
+ * Determine whether a char is a URI character (reserved or
+ * unreserved, not including '%' for escaped octets).
+ *
+ * @return true if the char is a URI character, false otherwise
+ */
+ private static boolean isURICharacter (char p_char) {
+ return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0);
+ }
+
+ /**
+ * Determine whether a char is a scheme character.
+ *
+ * @return true if the char is a scheme character, false otherwise
+ */
+ private static boolean isSchemeCharacter (char p_char) {
+ return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0);
+ }
+
+ /**
+ * Determine whether a char is a userinfo character.
+ *
+ * @return true if the char is a userinfo character, false otherwise
+ */
+ private static boolean isUserinfoCharacter (char p_char) {
+ return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0);
}
/**
@@ -1398,8 +1509,7 @@
continue;
}
}
- if (isReservedCharacter(testChar) ||
- isUnreservedCharacter(testChar)) {
+ if (isURICharacter(testChar)) {
continue;
}
else {
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org