You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2008/12/04 20:09:15 UTC

svn commit: r723398 - /lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java

Author: kubes
Date: Thu Dec  4 11:09:15 2008
New Revision: 723398

URL: http://svn.apache.org/viewvc?rev=723398&view=rev
Log:
NUTCH-647: Resolve URLs tool.  New methods for URLUtil should have been included but weren't.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java?rev=723398&r1=723397&r2=723398&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java Thu Dec  4 11:09:15 2008
@@ -194,6 +194,42 @@
     return temp ? src : dst;
   }
 
+  /**
+   * Returns the lowercased hostname for the url or null if the url is not well
+   * formed.
+   * 
+   * @param url The url to check.
+   * @return String The hostname for the url.
+   */
+  public static String getHost(String url) {
+    try {
+      return new URL(url).getHost().toLowerCase();
+    }
+    catch (MalformedURLException e) {
+      return null;
+    }
+  }
+
+  /**
+   * Returns the page for the url.  The page consists of the protocol, host,
+   * and path, but does not include the query string.  The host is lowercased
+   * but the path is not.
+   * 
+   * @param url The url to check.
+   * @return String The page for the url.
+   */
+  public static String getPage(String url) {
+    try {
+      // get the full url, and replace the query string with and empty string
+      url = url.toLowerCase();
+      String queryStr = new URL(url).getQuery();
+      return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
+    }
+    catch (MalformedURLException e) {
+      return null;
+    }
+  }
+  
   /** For testing */
   public static void main(String[] args){