You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ni...@apache.org on 2011/05/28 00:10:48 UTC

svn commit: r1128483 - /commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java

Author: nick
Date: Fri May 27 22:10:47 2011
New Revision: 1128483

URL: http://svn.apache.org/viewvc?rev=1128483&view=rev
Log:
VALIDATOR-191 Remove ORO dependency from the old URL Validator

Modified:
    commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java

Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java
URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java?rev=1128483&r1=1128482&r2=1128483&view=diff
==============================================================================
--- commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java (original)
+++ commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/UrlValidator.java Fri May 27 22:10:47 2011
@@ -20,10 +20,11 @@ import java.io.Serializable;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.validator.routines.InetAddressValidator;
 import org.apache.commons.validator.util.Flags;
-import org.apache.oro.text.perl.Perl5Util;
 
 /**
  * <p>Validates URLs.</p>
@@ -99,19 +100,18 @@ public class UrlValidator implements Ser
 
     private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
 
-    private static final String SCHEME_CHARS = ALPHA_CHARS;
-
     // Drop numeric, and  "+-." for now
-    private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
+    private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
 
     private static final String ATOM = VALID_CHARS + '+';
 
     /**
      * This expression derived/taken from the BNF for URI (RFC2396).
      */
-    private static final String URL_PATTERN =
-            "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/";
+    private static final String URL_REGEX =
+            "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
     //                                                                      12            3  4          5       6   7        8 9
+    private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
 
     /**
      * Schema/Protocol (ie. http:, ftp:, file:, etc).
@@ -132,11 +132,12 @@ public class UrlValidator implements Ser
     /**
      * Protocol (ie. http:, ftp:,https:).
      */
-    private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/";
+    private static final Pattern SCHEME_PATTERN = Pattern.compile("^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*");
 
-    private static final String AUTHORITY_PATTERN =
-            "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/";
+    private static final String AUTHORITY_REGEX =
+       "^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
     //                                                                            1                          2  3       4
+    private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX);
 
     private static final int PARSE_AUTHORITY_HOST_IP = 1;
 
@@ -147,20 +148,20 @@ public class UrlValidator implements Ser
      */
     private static final int PARSE_AUTHORITY_EXTRA = 3;
 
-    private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$/";
+    private static final Pattern PATH_PATTERN = Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$");
 
-    private static final String QUERY_PATTERN = "/^(.*)$/";
+    private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
 
-    private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/";
+    private static final Pattern LEGAL_ASCII_PATTERN = Pattern.compile("^\\p{ASCII}+$");
 
-    private static final String DOMAIN_PATTERN =
-            "/^" + ATOM + "(\\." + ATOM + ")*$/";
+    private static final Pattern DOMAIN_PATTERN =
+            Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
 
-    private static final String PORT_PATTERN = "/^:(\\d{1,5})$/";
+    private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
 
-    private static final String ATOM_PATTERN = "/(" + ATOM + ")/";
+    private static final Pattern ATOM_PATTERN = Pattern.compile("^(" + ATOM + ").*?$");
 
-    private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/";
+    private static final Pattern ALPHA_PATTERN = Pattern.compile("^[" + ALPHA_CHARS + "]");
 
     /**
      * Holds the set of current validation options.
@@ -238,36 +239,33 @@ public class UrlValidator implements Ser
         if (value == null) {
             return false;
         }
-
-        Perl5Util matchUrlPat = new Perl5Util();
-        Perl5Util matchAsciiPat = new Perl5Util();
-
-        if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) {
-            return false;
+        if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
+           return false;
         }
 
         // Check the whole url address structure
-        if (!matchUrlPat.match(URL_PATTERN, value)) {
+        Matcher urlMatcher = URL_PATTERN.matcher(value);
+        if (!urlMatcher.matches()) {
             return false;
         }
 
-        if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) {
+        if (!isValidScheme(urlMatcher.group(PARSE_URL_SCHEME))) {
             return false;
         }
 
-        if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) {
+        if (!isValidAuthority(urlMatcher.group(PARSE_URL_AUTHORITY))) {
             return false;
         }
 
-        if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) {
+        if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) {
             return false;
         }
 
-        if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) {
+        if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) {
             return false;
         }
 
-        if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) {
+        if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) {
             return false;
         }
 
@@ -287,8 +285,7 @@ public class UrlValidator implements Ser
             return false;
         }
 
-        Perl5Util schemeMatcher = new Perl5Util();
-        if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) {
+        if (!SCHEME_PATTERN.matcher(scheme).matches()) {
             return false;
         }
 
@@ -313,11 +310,11 @@ public class UrlValidator implements Ser
             return false;
         }
 
-        Perl5Util authorityMatcher = new Perl5Util();
         InetAddressValidator inetAddressValidator =
                 InetAddressValidator.getInstance();
 
-        if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) {
+        Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority); 
+        if (!authorityMatcher.matches()) {
             return false;
         }
 
@@ -328,8 +325,7 @@ public class UrlValidator implements Ser
 
         if (!ipV4Address) {
             // Domain is hostname name
-            Perl5Util domainMatcher = new Perl5Util();
-            hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP);
+            hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
         }
 
         //rightmost hostname will never start with a digit.
@@ -347,10 +343,10 @@ public class UrlValidator implements Ser
             boolean match = true;
             int segmentCount = 0;
             int segmentLength = 0;
-            Perl5Util atomMatcher = new Perl5Util();
 
             while (match) {
-                match = atomMatcher.match(ATOM_PATTERN, hostIP);
+                Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
+                match = atomMatcher.matches();
                 if (match) {
                     domainSegment[segmentCount] = atomMatcher.group(1);
                     segmentLength = domainSegment[segmentCount].length() + 1;
@@ -368,8 +364,7 @@ public class UrlValidator implements Ser
             }
 
             // First letter of top level must be a alpha
-            Perl5Util alphaMatcher = new Perl5Util();
-            if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) {
+            if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
                 return false;
             }
 
@@ -385,8 +380,7 @@ public class UrlValidator implements Ser
 
         String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
         if (port != null) {
-            Perl5Util portMatcher = new Perl5Util();
-            if (!portMatcher.match(PORT_PATTERN, port)) {
+            if (!PORT_PATTERN.matcher(port).matches()) {
                 return false;
             }
         }
@@ -409,9 +403,7 @@ public class UrlValidator implements Ser
             return false;
         }
 
-        Perl5Util pathMatcher = new Perl5Util();
-
-        if (!pathMatcher.match(PATH_PATTERN, path)) {
+        if (!PATH_PATTERN.matcher(path).matches()) {
             return false;
         }
 
@@ -441,8 +433,7 @@ public class UrlValidator implements Ser
             return true;
         }
 
-        Perl5Util queryMatcher = new Perl5Util();
-        return queryMatcher.match(QUERY_PATTERN, query);
+        return QUERY_PATTERN.matcher(query).matches();
     }
 
     /**