You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/09/25 20:14:32 UTC

svn commit: r449765 - /lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java

Author: ab
Date: Mon Sep 25 11:14:31 2006
New Revision: 449765

URL: http://svn.apache.org/viewvc?view=rev&rev=449765
Log:
Catch exception on invalid urls, and continue collecting valid ones.

Modified:
    lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java

Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java?view=diff&rev=449765&r1=449764&r2=449765
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java Mon Sep 25 11:14:31 2006
@@ -16,6 +16,7 @@
 
 package org.apache.nutch.parse;
 
+import java.net.MalformedURLException;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -108,7 +109,13 @@
         }
         result = matcher.getMatch();
         url = result.group(0);
-        outlinks.add(new Outlink(url, anchor, conf));
+        url = result.group(0);
+        try {
+          Outlink outlink = new Outlink(url, anchor, conf);
+          outlinks.add(new Outlink(url, anchor, conf));
+        } catch (MalformedURLException mue) {
+          LOG.warn("Invalid url: '" + url + "', skipping.");
+        }
       }
     } catch (Exception ex) {
       // if the matcher fails (perhaps a malformed URL) we just log it and move on