You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/11/28 23:41:07 UTC

svn commit: r885159 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/parse/ParseOutputFormat.java

Author: ab
Date: Sat Nov 28 22:41:06 2009
New Revision: 885159

URL: http://svn.apache.org/viewvc?rev=885159&view=rev
Log:
NUTCH-712 ParseOutputFormat should catch java.net.MalformedURLException
coming from normalizers.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=885159&r1=885158&r2=885159&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Sat Nov 28 22:41:06 2009
@@ -2,6 +2,9 @@
 
 Unreleased Changes
 
+* NUTCH-712 ParseOutputFormat should catch java.net.MalformedURLException
+  coming from normalizers (Julien Nioche via ab)
+
 * NUTCH-741 Job file includes multiple copies of nutch config files
   (Kirby Bohling via ab)
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=885159&r1=885158&r2=885159&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Sat Nov 28 22:41:06 2009
@@ -140,9 +140,13 @@
                 pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
               String newUrl = pstatus.getMessage();
               int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
-              newUrl = normalizers.normalize(newUrl,
-                                             URLNormalizers.SCOPE_FETCHER);
-              newUrl = filters.filter(newUrl);
+              try {
+                newUrl = normalizers.normalize(newUrl,
+                    URLNormalizers.SCOPE_FETCHER);
+              } catch (MalformedURLException mfue) {
+                newUrl = null;
+              }
+              if (newUrl != null) newUrl = filters.filter(newUrl);
               String url = key.toString();
               if (newUrl != null && !newUrl.equals(url)) {
                 String reprUrl =