You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2007/01/06 10:39:21 UTC

svn commit: r493438 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/net/URLFilters.java src/test/org/apache/nutch/net/TestURLFilters.java

Author: siren
Date: Sat Jan  6 01:39:20 2007
New Revision: 493438

URL: http://svn.apache.org/viewvc?view=rev&rev=493438
Log:
Fix NUTCH-325

Added:
    lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=493438&r1=493437&r2=493438
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Sat Jan  6 01:39:20 2007
@@ -117,6 +117,9 @@
 37. NUTCH-425, NUTCH-426 - Fix anchors pollution. Continue after
     skipping bad URLs. (Michael Stack via ab)
 
+38. NUTCH-325 - UrlFilters.java throws NPE in case urlfilter.order contains
+    Filters that are not in plugin.includes (Stefan Groschupf, siren)
+
 
 Release 0.8 - 2006-07-25
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java?view=diff&rev=493438&r1=493437&r2=493438
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java Sat Jan  6 01:39:20 2007
@@ -17,6 +17,7 @@
 
 package org.apache.nutch.net;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 
 import org.apache.nutch.plugin.Extension;
@@ -28,10 +29,11 @@
 /** Creates and caches {@link URLFilter} implementing plugins.*/
 public class URLFilters {
 
+  public static final String URLFILTER_ORDER = "urlfilter.order";
   private URLFilter[] filters;
 
   public URLFilters(Configuration conf) {
-      String order = conf.get("urlfilter.order");
+      String order = conf.get(URLFILTER_ORDER);
       this.filters = (URLFilter[]) conf.getObject(URLFilter.class.getName());
       
       if (this.filters == null) {
@@ -60,12 +62,16 @@
                     conf.setObject(URLFilter.class.getName(), filterMap
                             .values().toArray(new URLFilter[0]));
                 } else {
-                    URLFilter[] filter = new URLFilter[orderedFilters.length];
+                    ArrayList filters = new ArrayList();
                     for (int i = 0; i < orderedFilters.length; i++) {
-                        filter[i] = (URLFilter) filterMap
+                      URLFilter filter = (URLFilter) filterMap
                                 .get(orderedFilters[i]);
+                      if(filter != null){
+                        filters.add(filter);
+                      }
                     }
-                    conf.setObject(URLFilter.class.getName(), filter);
+                    conf.setObject(URLFilter.class.getName(), 
+                        filters.toArray(new URLFilter[filters.size()]));
                 }
             } catch (PluginRuntimeException e) {
                 throw new RuntimeException(e);

Added: lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java?view=auto&rev=493438
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java (added)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java Sat Jan  6 01:39:20 2007
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestURLFilters extends TestCase {
+
+  /**
+   * Testcase for NUTCH-325.
+   * @throws URLFilterException
+   */
+  public void testNonExistingUrlFilter() throws URLFilterException {
+    Configuration conf = NutchConfiguration.create();
+    String class1 = "NonExistingFilter";
+    String class2 = "org.apache.nutch.urlfilter.prefix.PrefixURLFilter";
+    conf.set(URLFilters.URLFILTER_ORDER, class1 + " " + class2);
+
+    URLFilters normalizers = new URLFilters(conf);
+    normalizers.filter("http://someurl/");
+  }
+
+}