You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/05 22:49:37 UTC

[53/69] [abbrv] nutch git commit: Moved test sources to maven standard directory

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java b/nutch-plugins/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
deleted file mode 100644
index b86181e..0000000
--- a/nutch-plugins/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.regex;
-
-// JDK imports
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.nutch.net.*;
-// Nutch imports
-import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * JUnit based test of class <code>RegexURLFilter</code>.
- * 
- * @author J&eacute;r&ocirc;me Charron
- */
-public class TestRegexURLFilter extends RegexURLFilterBaseTest {
-
-  protected URLFilter getURLFilter(Reader rules) {
-    try {
-      return new RegexURLFilter(rules);
-    } catch (IOException e) {
-      Assert.fail(e.toString());
-      return null;
-    }
-  }
-
-  @Test
-  public void test() {
-    test("WholeWebCrawling");
-    test("IntranetCrawling");
-    bench(50, "Benchmarks");
-    bench(100, "Benchmarks");
-    bench(200, "Benchmarks");
-    bench(400, "Benchmarks");
-    bench(800, "Benchmarks");
-  }
-  
-  @Test
-  public void test1838() {
-    test("nutch1838");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-suffix/src/test/java/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlfilter-suffix/src/test/java/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java b/nutch-plugins/urlfilter-suffix/src/test/java/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
new file mode 100644
index 0000000..b09ca2f
--- /dev/null
+++ b/nutch-plugins/urlfilter-suffix/src/test/java/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.suffix;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * JUnit test for <code>SuffixURLFilter</code>.
+ * 
+ * @author Andrzej Bialecki
+ */
+public class TestSuffixURLFilter {
+  private static final String suffixes = "# this is a comment\n" + "\n"
+      + ".gif\n" + ".jpg\n" + ".js\n";
+
+  private static final String[] urls = new String[] {
+      "http://www.example.com/test.gif", "http://www.example.com/TEST.GIF",
+      "http://www.example.com/test.jpg", "http://www.example.com/test.JPG",
+      "http://www.example.com/test.html", "http://www.example.com/test.HTML",
+      "http://www.example.com/test.html?q=abc.js",
+      "http://www.example.com/test.js?foo=bar&baz=bar#12333", };
+
+  private static String[] urlsModeAccept = new String[] { null, urls[1], null,
+      urls[3], urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeReject = new String[] { urls[0], null,
+      urls[2], null, null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptIgnoreCase = new String[] { null, null,
+      null, null, urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeRejectIgnoreCase = new String[] { urls[0],
+      urls[1], urls[2], urls[3], null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptAndPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], urls[6], null };
+
+  private static String[] urlsModeAcceptAndNonPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], null, urls[7] };
+
+  private SuffixURLFilter filter = null;
+
+  @Before
+  public void setUp() throws IOException {
+    filter = new SuffixURLFilter(new StringReader(suffixes));
+  }
+
+  @Test
+  public void testModeAccept() {
+    filter.setIgnoreCase(false);
+    filter.setModeAccept(true);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeAccept[i] == filter.filter(urls[i]));
+    }
+  }
+
+  @Test
+  public void testModeReject() {
+    filter.setIgnoreCase(false);
+    filter.setModeAccept(false);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeReject[i] == filter.filter(urls[i]));
+    }
+  }
+
+  @Test
+  public void testModeAcceptIgnoreCase() {
+    filter.setIgnoreCase(true);
+    filter.setModeAccept(true);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeAcceptIgnoreCase[i] == filter.filter(urls[i]));
+    }
+  }
+
+  @Test
+  public void testModeRejectIgnoreCase() {
+    filter.setIgnoreCase(true);
+    filter.setModeAccept(false);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
+    }
+  }
+
+  @Test
+  public void testModeAcceptAndNonPathFilter() {
+    filter.setModeAccept(true);
+    filter.setFilterFromPath(false);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeAcceptAndNonPathFilter[i] == filter
+          .filter(urls[i]));
+    }
+  }
+
+  @Test
+  public void testModeAcceptAndPathFilter() {
+    filter.setModeAccept(true);
+    filter.setFilterFromPath(true);
+    for (int i = 0; i < urls.length; i++) {
+      Assert.assertTrue(urlsModeAcceptAndPathFilter[i] == filter
+          .filter(urls[i]));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java b/nutch-plugins/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
deleted file mode 100644
index b09ca2f..0000000
--- a/nutch-plugins/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.suffix;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * JUnit test for <code>SuffixURLFilter</code>.
- * 
- * @author Andrzej Bialecki
- */
-public class TestSuffixURLFilter {
-  private static final String suffixes = "# this is a comment\n" + "\n"
-      + ".gif\n" + ".jpg\n" + ".js\n";
-
-  private static final String[] urls = new String[] {
-      "http://www.example.com/test.gif", "http://www.example.com/TEST.GIF",
-      "http://www.example.com/test.jpg", "http://www.example.com/test.JPG",
-      "http://www.example.com/test.html", "http://www.example.com/test.HTML",
-      "http://www.example.com/test.html?q=abc.js",
-      "http://www.example.com/test.js?foo=bar&baz=bar#12333", };
-
-  private static String[] urlsModeAccept = new String[] { null, urls[1], null,
-      urls[3], urls[4], urls[5], null, urls[7] };
-
-  private static String[] urlsModeReject = new String[] { urls[0], null,
-      urls[2], null, null, null, urls[6], null };
-
-  private static String[] urlsModeAcceptIgnoreCase = new String[] { null, null,
-      null, null, urls[4], urls[5], null, urls[7] };
-
-  private static String[] urlsModeRejectIgnoreCase = new String[] { urls[0],
-      urls[1], urls[2], urls[3], null, null, urls[6], null };
-
-  private static String[] urlsModeAcceptAndPathFilter = new String[] { null,
-      urls[1], null, urls[3], urls[4], urls[5], urls[6], null };
-
-  private static String[] urlsModeAcceptAndNonPathFilter = new String[] { null,
-      urls[1], null, urls[3], urls[4], urls[5], null, urls[7] };
-
-  private SuffixURLFilter filter = null;
-
-  @Before
-  public void setUp() throws IOException {
-    filter = new SuffixURLFilter(new StringReader(suffixes));
-  }
-
-  @Test
-  public void testModeAccept() {
-    filter.setIgnoreCase(false);
-    filter.setModeAccept(true);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeAccept[i] == filter.filter(urls[i]));
-    }
-  }
-
-  @Test
-  public void testModeReject() {
-    filter.setIgnoreCase(false);
-    filter.setModeAccept(false);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeReject[i] == filter.filter(urls[i]));
-    }
-  }
-
-  @Test
-  public void testModeAcceptIgnoreCase() {
-    filter.setIgnoreCase(true);
-    filter.setModeAccept(true);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeAcceptIgnoreCase[i] == filter.filter(urls[i]));
-    }
-  }
-
-  @Test
-  public void testModeRejectIgnoreCase() {
-    filter.setIgnoreCase(true);
-    filter.setModeAccept(false);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
-    }
-  }
-
-  @Test
-  public void testModeAcceptAndNonPathFilter() {
-    filter.setModeAccept(true);
-    filter.setFilterFromPath(false);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeAcceptAndNonPathFilter[i] == filter
-          .filter(urls[i]));
-    }
-  }
-
-  @Test
-  public void testModeAcceptAndPathFilter() {
-    filter.setModeAccept(true);
-    filter.setFilterFromPath(true);
-    for (int i = 0; i < urls.length; i++) {
-      Assert.assertTrue(urlsModeAcceptAndPathFilter[i] == filter
-          .filter(urls[i]));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-validator/src/test/java/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlfilter-validator/src/test/java/org/apache/nutch/urlfilter/validator/TestUrlValidator.java b/nutch-plugins/urlfilter-validator/src/test/java/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
new file mode 100644
index 0000000..2e6d695
--- /dev/null
+++ b/nutch-plugins/urlfilter-validator/src/test/java/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.validator;
+
+import org.apache.nutch.urlfilter.validator.UrlValidator;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * JUnit test case which tests 1. that valid urls are not filtered while invalid
+ * ones are filtered. 2. that Urls' scheme, authority, path and query are
+ * validated.
+ * 
+ * @author tejasp
+ * 
+ */
+
+public class TestUrlValidator {
+
+  /**
+   * Test method for
+   * {@link org.apache.nutch.urlfilter.validator.UrlValidator#filter(java.lang.String)}
+   * .
+   */
+  @Test
+  public void testFilter() {
+    UrlValidator url_validator = new UrlValidator();
+    Assert.assertNotNull(url_validator);
+
+    Assert.assertNull("Filtering on a null object should return null",
+        url_validator.filter(null));
+    Assert.assertNull("Invalid url: example.com/file[/].html",
+        url_validator.filter("example.com/file[/].html"));
+    Assert.assertNull("Invalid url: http://www.example.com/space here.html",
+        url_validator.filter("http://www.example.com/space here.html"));
+    Assert.assertNull("Invalid url: /main.html",
+        url_validator.filter("/main.html"));
+    Assert.assertNull("Invalid url: www.example.com/main.html",
+        url_validator.filter("www.example.com/main.html"));
+    Assert.assertNull("Invalid url: ftp:www.example.com/main.html",
+        url_validator.filter("ftp:www.example.com/main.html"));
+    Assert.assertNull(
+        "Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
+        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
+    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html",
+        url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
+
+    Assert.assertNotNull(
+        "Valid url: https://issues.apache.org/jira/NUTCH-1127",
+        url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
+    Assert
+        .assertNotNull(
+            "Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&amp;name=Fonzi&amp;mood=happy&amp;coat=leather",
+            url_validator
+                .filter("http://domain.tld/function.cgi?url=http://fonzi.com/&amp;name=Fonzi&amp;mood=happy&amp;coat=leather"));
+    Assert
+        .assertNotNull(
+            "Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
+            url_validator
+                .filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
+    Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf",
+        url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java b/nutch-plugins/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
deleted file mode 100644
index 2e6d695..0000000
--- a/nutch-plugins/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.validator;
-
-import org.apache.nutch.urlfilter.validator.UrlValidator;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * JUnit test case which tests 1. that valid urls are not filtered while invalid
- * ones are filtered. 2. that Urls' scheme, authority, path and query are
- * validated.
- * 
- * @author tejasp
- * 
- */
-
-public class TestUrlValidator {
-
-  /**
-   * Test method for
-   * {@link org.apache.nutch.urlfilter.validator.UrlValidator#filter(java.lang.String)}
-   * .
-   */
-  @Test
-  public void testFilter() {
-    UrlValidator url_validator = new UrlValidator();
-    Assert.assertNotNull(url_validator);
-
-    Assert.assertNull("Filtering on a null object should return null",
-        url_validator.filter(null));
-    Assert.assertNull("Invalid url: example.com/file[/].html",
-        url_validator.filter("example.com/file[/].html"));
-    Assert.assertNull("Invalid url: http://www.example.com/space here.html",
-        url_validator.filter("http://www.example.com/space here.html"));
-    Assert.assertNull("Invalid url: /main.html",
-        url_validator.filter("/main.html"));
-    Assert.assertNull("Invalid url: www.example.com/main.html",
-        url_validator.filter("www.example.com/main.html"));
-    Assert.assertNull("Invalid url: ftp:www.example.com/main.html",
-        url_validator.filter("ftp:www.example.com/main.html"));
-    Assert.assertNull(
-        "Inalid url: http://999.000.456.32/nutch/trunk/README.txt",
-        url_validator.filter("http://999.000.456.32/nutch/trunk/README.txt"));
-    Assert.assertNull("Invalid url: http://www.example.com/ma|in\\toc.html",
-        url_validator.filter(" http://www.example.com/ma|in\\toc.html"));
-
-    Assert.assertNotNull(
-        "Valid url: https://issues.apache.org/jira/NUTCH-1127",
-        url_validator.filter("https://issues.apache.org/jira/NUTCH-1127"));
-    Assert
-        .assertNotNull(
-            "Valid url: http://domain.tld/function.cgi?url=http://fonzi.com/&amp;name=Fonzi&amp;mood=happy&amp;coat=leather",
-            url_validator
-                .filter("http://domain.tld/function.cgi?url=http://fonzi.com/&amp;name=Fonzi&amp;mood=happy&amp;coat=leather"));
-    Assert
-        .assertNotNull(
-            "Valid url: http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress",
-            url_validator
-                .filter("http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Ffeeds.feedburner.com%2Fperishablepress"));
-    Assert.assertNotNull("Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf",
-        url_validator.filter("ftp://alfa.bravo.pi/mike/check/plan.pdf"));
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-ajax/src/test/java/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-ajax/src/test/java/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java b/nutch-plugins/urlnormalizer-ajax/src/test/java/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
new file mode 100644
index 0000000..d815c45
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-ajax/src/test/java/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.net.urlnormalizer.ajax;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+/** Unit tests for AjaxURLNormalizer. */
+public class TestAjaxURLNormalizer extends TestCase {
+  private AjaxURLNormalizer normalizer;
+  private Configuration conf;
+  
+  public TestAjaxURLNormalizer(String name) {
+    super(name);
+    normalizer = new AjaxURLNormalizer();
+    conf = NutchConfiguration.create();
+    normalizer.setConf(conf);
+  }
+
+  public void testNormalizer() throws Exception {
+    // check if AJAX URL's are normalized to an _escaped_frament_ form
+    normalizeTest("http://example.org/#!k=v", "http://example.org/?_escaped_fragment_=k=v");
+
+    // Check with some escaped chars
+    normalizeTest("http://example.org/#!k=v&something=is wrong", "http://example.org/?_escaped_fragment_=k=v%26something=is%20wrong");
+
+    // Check with query string and multiple fragment params
+    normalizeTest("http://example.org/path.html?queryparam=queryvalue#!key1=value1&key2=value2", "http://example.org/path.html?queryparam=queryvalue&_escaped_fragment_=key1=value1%26key2=value2");
+  }
+  
+  public void testNormalizerWhenIndexing() throws Exception {
+    // check if it works the other way around
+    normalizeTest("http://example.org/?_escaped_fragment_=key=value", "http://example.org/#!key=value", URLNormalizers.SCOPE_INDEXER);
+    normalizeTest("http://example.org/?key=value&_escaped_fragment_=key=value", "http://example.org/?key=value#!key=value", URLNormalizers.SCOPE_INDEXER);
+    normalizeTest("http://example.org/page.html?key=value&_escaped_fragment_=key=value%26something=is%20wrong", "http://example.org/page.html?key=value#!key=value&something=is wrong", URLNormalizers.SCOPE_INDEXER);
+  }
+
+  private void normalizeTest(String weird, String normal) throws Exception {
+    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+  }
+  
+  private void normalizeTest(String weird, String normal, String scope) throws Exception {
+    assertEquals(normal, normalizer.normalize(weird, scope));
+  }
+
+  public static void main(String[] args) throws Exception {
+    new TestAjaxURLNormalizer("test").testNormalizer();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-ajax/src/test/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-ajax/src/test/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java b/nutch-plugins/urlnormalizer-ajax/src/test/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
deleted file mode 100644
index d815c45..0000000
--- a/nutch-plugins/urlnormalizer-ajax/src/test/org/apache/nutch/net/urlnormalizer/ajax/TestAjaxURLNormalizer.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.net.urlnormalizer.ajax;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-
-import junit.framework.TestCase;
-
-/** Unit tests for AjaxURLNormalizer. */
-public class TestAjaxURLNormalizer extends TestCase {
-  private AjaxURLNormalizer normalizer;
-  private Configuration conf;
-  
-  public TestAjaxURLNormalizer(String name) {
-    super(name);
-    normalizer = new AjaxURLNormalizer();
-    conf = NutchConfiguration.create();
-    normalizer.setConf(conf);
-  }
-
-  public void testNormalizer() throws Exception {
-    // check if AJAX URL's are normalized to an _escaped_frament_ form
-    normalizeTest("http://example.org/#!k=v", "http://example.org/?_escaped_fragment_=k=v");
-
-    // Check with some escaped chars
-    normalizeTest("http://example.org/#!k=v&something=is wrong", "http://example.org/?_escaped_fragment_=k=v%26something=is%20wrong");
-
-    // Check with query string and multiple fragment params
-    normalizeTest("http://example.org/path.html?queryparam=queryvalue#!key1=value1&key2=value2", "http://example.org/path.html?queryparam=queryvalue&_escaped_fragment_=key1=value1%26key2=value2");
-  }
-  
-  public void testNormalizerWhenIndexing() throws Exception {
-    // check if it works the other way around
-    normalizeTest("http://example.org/?_escaped_fragment_=key=value", "http://example.org/#!key=value", URLNormalizers.SCOPE_INDEXER);
-    normalizeTest("http://example.org/?key=value&_escaped_fragment_=key=value", "http://example.org/?key=value#!key=value", URLNormalizers.SCOPE_INDEXER);
-    normalizeTest("http://example.org/page.html?key=value&_escaped_fragment_=key=value%26something=is%20wrong", "http://example.org/page.html?key=value#!key=value&something=is wrong", URLNormalizers.SCOPE_INDEXER);
-  }
-
-  private void normalizeTest(String weird, String normal) throws Exception {
-    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
-  }
-  
-  private void normalizeTest(String weird, String normal, String scope) throws Exception {
-    assertEquals(normal, normalizer.normalize(weird, scope));
-  }
-
-  public static void main(String[] args) throws Exception {
-    new TestAjaxURLNormalizer("test").testNormalizer();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-basic/src/test/java/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-basic/src/test/java/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java b/nutch-plugins/urlnormalizer-basic/src/test/java/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
new file mode 100644
index 0000000..9a0f8c4
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-basic/src/test/java/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.net.urlnormalizer.basic;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+/** Unit tests for BasicURLNormalizer. */
+public class TestBasicURLNormalizer {
+  private BasicURLNormalizer normalizer;
+
+  private Configuration conf;
+
+  public TestBasicURLNormalizer() {
+    normalizer = new BasicURLNormalizer();
+    conf = NutchConfiguration.create();
+    normalizer.setConf(conf);
+  }
+  
+  @Test
+  public void testNUTCH1098() throws Exception {
+    // check that % encoding is normalized
+    normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
+
+    // check that % encoding works correctly at end of URL
+    normalizeTest("http://foo.com/%66oo.htm%6c", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/%66oo.ht%6dl", "http://foo.com/foo.html");
+
+    // check that % decoder do not overlap strings
+    normalizeTest("http://foo.com/%66oo.ht%6d%6c", "http://foo.com/foo.html");
+    
+    // check that % decoder leaves high bit chars alone
+    normalizeTest("http://foo.com/%66oo.htm%C0", "http://foo.com/foo.htm%C0");
+
+    // check that % decoder leaves control chars alone
+    normalizeTest("http://foo.com/%66oo.htm%1A", "http://foo.com/foo.htm%1A");
+
+    // check that % decoder converts to upper case letters
+    normalizeTest("http://foo.com/%66oo.htm%c0", "http://foo.com/foo.htm%C0");
+
+    // check that % decoder leaves encoded spaces alone
+    normalizeTest("http://foo.com/you%20too.html", "http://foo.com/you%20too.html");
+
+    // check that spaces are encoded into %20
+    normalizeTest("http://foo.com/you too.html", "http://foo.com/you%20too.html");
+
+    // check that encoded # are not decoded
+    normalizeTest("http://foo.com/file.html%23cz", "http://foo.com/file.html%23cz");
+
+    // check that encoded / are not decoded
+    normalizeTest("http://foo.com/fast/dir%2fcz", "http://foo.com/fast/dir%2Fcz");
+
+    // check that control chars are encoded
+    normalizeTest("http://foo.com/\u001a!", "http://foo.com/%1A!");
+
+    // check that control chars are always encoded into 2 digits
+    normalizeTest("http://foo.com/\u0001!", "http://foo.com/%01!");
+
+    // check encoding of spanish chars
+    normalizeTest("http://mydomain.com/en Espa\u00F1ol.aspx", "http://mydomain.com/en%20Espa%C3%B1ol.aspx");
+  }
+  
+  @Test
+  public void testNUTCH2064() throws Exception {
+    // Ampersand and colon and other punctuation characters are not to be unescaped
+    normalizeTest("http://x.com/s?q=a%26b&m=10", "http://x.com/s?q=a%26b&m=10");
+    normalizeTest("http://x.com/show?http%3A%2F%2Fx.com%2Fb",
+        "http://x.com/show?http%3A%2F%2Fx.com%2Fb");
+    normalizeTest("http://google.com/search?q=c%2B%2B",
+        "http://google.com/search?q=c%2B%2B");
+    // do also not touch the query part which is application/x-www-form-urlencoded
+    normalizeTest("http://x.com/s?q=a+b", "http://x.com/s?q=a+b");
+    // and keep Internationalized domain names
+    // http://b�cher.de/ may be http://xn--bcher-kva.de/
+    // but definitely not http://b%C3%BCcher.de/
+    normalizeTest("http://b\u00fccher.de/", "http://b\u00fccher.de/");
+    // test whether percent-encoding works together with other normalizations
+    normalizeTest("http://x.com/./a/../%66.html", "http://x.com/f.html");
+    // [ and ] need escaping as well
+    normalizeTest("http://x.com/?x[y]=1", "http://x.com/?x%5By%5D=1");
+    // boundary test for first character outside the ASCII range (U+0080)
+    normalizeTest("http://x.com/foo\u0080", "http://x.com/foo%C2%80");
+    normalizeTest("http://x.com/foo%c2%80", "http://x.com/foo%C2%80");
+  }
+
+  @Test
+  public void testNormalizer() throws Exception {
+    // check that leading and trailing spaces are removed
+    normalizeTest(" http://foo.com/ ", "http://foo.com/");
+
+    // check that protocol is lower cased
+    normalizeTest("HTTP://foo.com/", "http://foo.com/");
+
+    // check that host is lower cased
+    normalizeTest("http://Foo.Com/index.html", "http://foo.com/index.html");
+    normalizeTest("http://Foo.Com/index.html", "http://foo.com/index.html");
+
+    // check that port number is normalized
+    normalizeTest("http://foo.com:80/index.html", "http://foo.com/index.html");
+    normalizeTest("http://foo.com:81/", "http://foo.com:81/");
+
+    // check that null path is normalized
+    normalizeTest("http://foo.com", "http://foo.com/");
+
+    // check that references are removed
+    normalizeTest("http://foo.com/foo.html#ref", "http://foo.com/foo.html");
+
+    // // check that encoding is normalized
+    // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
+
+    // check that unnecessary "../" are removed
+
+    normalizeTest("http://foo.com/aa/./foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/aa/../", "http://foo.com/");
+    normalizeTest("http://foo.com/aa/bb/../", "http://foo.com/aa/");
+    normalizeTest("http://foo.com/aa/..", "http://foo.com/");
+    normalizeTest("http://foo.com/aa/bb/cc/../../foo.html",
+        "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/aa/bb/../cc/dd/../ee/foo.html",
+        "http://foo.com/aa/cc/ee/foo.html");
+    normalizeTest("http://foo.com/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/aa/../../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/aa/../bb/../foo.html/../../",
+        "http://foo.com/");
+    normalizeTest("http://foo.com/../aa/foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/a..a/foo.html",
+        "http://foo.com/a..a/foo.html");
+    normalizeTest("http://foo.com/a..a/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/foo.foo/../foo.html",
+        "http://foo.com/foo.html");
+    normalizeTest("http://foo.com//aa/bb/foo.html",
+        "http://foo.com/aa/bb/foo.html");
+    normalizeTest("http://foo.com/aa//bb/foo.html",
+        "http://foo.com/aa/bb/foo.html");
+    normalizeTest("http://foo.com/aa/bb//foo.html",
+        "http://foo.com/aa/bb/foo.html");
+    normalizeTest("http://foo.com//aa//bb//foo.html",
+        "http://foo.com/aa/bb/foo.html");
+    normalizeTest("http://foo.com////aa////bb////foo.html",
+        "http://foo.com/aa/bb/foo.html");
+    normalizeTest("http://foo.com/aa?referer=http://bar.com",
+        "http://foo.com/aa?referer=http://bar.com");
+  }
+
+  private void normalizeTest(String weird, String normal) throws Exception {
+    Assert.assertEquals("normalizing: " + weird, normal,
+        normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+  }
+
+  public static void main(String[] args) throws Exception {
+    new TestBasicURLNormalizer().testNormalizer();
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java b/nutch-plugins/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
deleted file mode 100644
index 9a0f8c4..0000000
--- a/nutch-plugins/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.net.urlnormalizer.basic;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Test;
-
-/** Unit tests for BasicURLNormalizer. */
-public class TestBasicURLNormalizer {
-  private BasicURLNormalizer normalizer;
-
-  private Configuration conf;
-
-  public TestBasicURLNormalizer() {
-    normalizer = new BasicURLNormalizer();
-    conf = NutchConfiguration.create();
-    normalizer.setConf(conf);
-  }
-  
-  @Test
-  public void testNUTCH1098() throws Exception {
-    // check that % encoding is normalized
-    normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
-
-    // check that % encoding works correctly at end of URL
-    normalizeTest("http://foo.com/%66oo.htm%6c", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/%66oo.ht%6dl", "http://foo.com/foo.html");
-
-    // check that % decoder do not overlap strings
-    normalizeTest("http://foo.com/%66oo.ht%6d%6c", "http://foo.com/foo.html");
-    
-    // check that % decoder leaves high bit chars alone
-    normalizeTest("http://foo.com/%66oo.htm%C0", "http://foo.com/foo.htm%C0");
-
-    // check that % decoder leaves control chars alone
-    normalizeTest("http://foo.com/%66oo.htm%1A", "http://foo.com/foo.htm%1A");
-
-    // check that % decoder converts to upper case letters
-    normalizeTest("http://foo.com/%66oo.htm%c0", "http://foo.com/foo.htm%C0");
-
-    // check that % decoder leaves encoded spaces alone
-    normalizeTest("http://foo.com/you%20too.html", "http://foo.com/you%20too.html");
-
-    // check that spaces are encoded into %20
-    normalizeTest("http://foo.com/you too.html", "http://foo.com/you%20too.html");
-
-    // check that encoded # are not decoded
-    normalizeTest("http://foo.com/file.html%23cz", "http://foo.com/file.html%23cz");
-
-    // check that encoded / are not decoded
-    normalizeTest("http://foo.com/fast/dir%2fcz", "http://foo.com/fast/dir%2Fcz");
-
-    // check that control chars are encoded
-    normalizeTest("http://foo.com/\u001a!", "http://foo.com/%1A!");
-
-    // check that control chars are always encoded into 2 digits
-    normalizeTest("http://foo.com/\u0001!", "http://foo.com/%01!");
-
-    // check encoding of spanish chars
-    normalizeTest("http://mydomain.com/en Espa\u00F1ol.aspx", "http://mydomain.com/en%20Espa%C3%B1ol.aspx");
-  }
-  
-  @Test
-  public void testNUTCH2064() throws Exception {
-    // Ampersand and colon and other punctuation characters are not to be unescaped
-    normalizeTest("http://x.com/s?q=a%26b&m=10", "http://x.com/s?q=a%26b&m=10");
-    normalizeTest("http://x.com/show?http%3A%2F%2Fx.com%2Fb",
-        "http://x.com/show?http%3A%2F%2Fx.com%2Fb");
-    normalizeTest("http://google.com/search?q=c%2B%2B",
-        "http://google.com/search?q=c%2B%2B");
-    // do also not touch the query part which is application/x-www-form-urlencoded
-    normalizeTest("http://x.com/s?q=a+b", "http://x.com/s?q=a+b");
-    // and keep Internationalized domain names
-    // http://b�cher.de/ may be http://xn--bcher-kva.de/
-    // but definitely not http://b%C3%BCcher.de/
-    normalizeTest("http://b\u00fccher.de/", "http://b\u00fccher.de/");
-    // test whether percent-encoding works together with other normalizations
-    normalizeTest("http://x.com/./a/../%66.html", "http://x.com/f.html");
-    // [ and ] need escaping as well
-    normalizeTest("http://x.com/?x[y]=1", "http://x.com/?x%5By%5D=1");
-    // boundary test for first character outside the ASCII range (U+0080)
-    normalizeTest("http://x.com/foo\u0080", "http://x.com/foo%C2%80");
-    normalizeTest("http://x.com/foo%c2%80", "http://x.com/foo%C2%80");
-  }
-
-  @Test
-  public void testNormalizer() throws Exception {
-    // check that leading and trailing spaces are removed
-    normalizeTest(" http://foo.com/ ", "http://foo.com/");
-
-    // check that protocol is lower cased
-    normalizeTest("HTTP://foo.com/", "http://foo.com/");
-
-    // check that host is lower cased
-    normalizeTest("http://Foo.Com/index.html", "http://foo.com/index.html");
-    normalizeTest("http://Foo.Com/index.html", "http://foo.com/index.html");
-
-    // check that port number is normalized
-    normalizeTest("http://foo.com:80/index.html", "http://foo.com/index.html");
-    normalizeTest("http://foo.com:81/", "http://foo.com:81/");
-
-    // check that null path is normalized
-    normalizeTest("http://foo.com", "http://foo.com/");
-
-    // check that references are removed
-    normalizeTest("http://foo.com/foo.html#ref", "http://foo.com/foo.html");
-
-    // // check that encoding is normalized
-    // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
-
-    // check that unnecessary "../" are removed
-
-    normalizeTest("http://foo.com/aa/./foo.html", "http://foo.com/aa/foo.html");
-    normalizeTest("http://foo.com/aa/../", "http://foo.com/");
-    normalizeTest("http://foo.com/aa/bb/../", "http://foo.com/aa/");
-    normalizeTest("http://foo.com/aa/..", "http://foo.com/");
-    normalizeTest("http://foo.com/aa/bb/cc/../../foo.html",
-        "http://foo.com/aa/foo.html");
-    normalizeTest("http://foo.com/aa/bb/../cc/dd/../ee/foo.html",
-        "http://foo.com/aa/cc/ee/foo.html");
-    normalizeTest("http://foo.com/../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/../../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/aa/../../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/aa/../bb/../foo.html/../../",
-        "http://foo.com/");
-    normalizeTest("http://foo.com/../aa/foo.html", "http://foo.com/aa/foo.html");
-    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/a..a/foo.html",
-        "http://foo.com/a..a/foo.html");
-    normalizeTest("http://foo.com/a..a/../foo.html", "http://foo.com/foo.html");
-    normalizeTest("http://foo.com/foo.foo/../foo.html",
-        "http://foo.com/foo.html");
-    normalizeTest("http://foo.com//aa/bb/foo.html",
-        "http://foo.com/aa/bb/foo.html");
-    normalizeTest("http://foo.com/aa//bb/foo.html",
-        "http://foo.com/aa/bb/foo.html");
-    normalizeTest("http://foo.com/aa/bb//foo.html",
-        "http://foo.com/aa/bb/foo.html");
-    normalizeTest("http://foo.com//aa//bb//foo.html",
-        "http://foo.com/aa/bb/foo.html");
-    normalizeTest("http://foo.com////aa////bb////foo.html",
-        "http://foo.com/aa/bb/foo.html");
-    normalizeTest("http://foo.com/aa?referer=http://bar.com",
-        "http://foo.com/aa?referer=http://bar.com");
-  }
-
-  private void normalizeTest(String weird, String normal) throws Exception {
-    Assert.assertEquals("normalizing: " + weird, normal,
-        normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
-  }
-
-  public static void main(String[] args) throws Exception {
-    new TestBasicURLNormalizer().testNormalizer();
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-host/src/test/java/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-host/src/test/java/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java b/nutch-plugins/urlnormalizer-host/src/test/java/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
new file mode 100644
index 0000000..c9e1a2c
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-host/src/test/java/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.host;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestHostURLNormalizer {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  @Test
+  public void testHostURLNormalizer() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String hostsFile = SAMPLES + SEPARATOR + "hosts.txt";
+    HostURLNormalizer normalizer = new HostURLNormalizer(hostsFile);
+    normalizer.setConf(conf);
+
+    // Force www. sub domain when hitting link without sub domain
+    Assert.assertEquals("http://www.example.org/page.html",
+        normalizer.normalize("http://example.org/page.html",
+            URLNormalizers.SCOPE_DEFAULT));
+
+    // Force no sub domain to www. URL's
+    Assert.assertEquals("http://example.net/path/to/something.html", normalizer
+        .normalize("http://www.example.net/path/to/something.html",
+            URLNormalizers.SCOPE_DEFAULT));
+
+    // Force all sub domains to www.
+    Assert.assertEquals("http://example.com/?does=it&still=work", normalizer
+        .normalize("http://example.com/?does=it&still=work",
+            URLNormalizers.SCOPE_DEFAULT));
+    Assert.assertEquals("http://example.com/buh", normalizer.normalize(
+        "http://http.www.example.com/buh", URLNormalizers.SCOPE_DEFAULT));
+    Assert.assertEquals("http://example.com/blaat", normalizer.normalize(
+        "http://whatever.example.com/blaat", URLNormalizers.SCOPE_DEFAULT));
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-host/src/test/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-host/src/test/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java b/nutch-plugins/urlnormalizer-host/src/test/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
deleted file mode 100644
index c9e1a2c..0000000
--- a/nutch-plugins/urlnormalizer-host/src/test/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.net.urlnormalizer.host;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestHostURLNormalizer {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  @Test
-  public void testHostURLNormalizer() throws Exception {
-    Configuration conf = NutchConfiguration.create();
-
-    String hostsFile = SAMPLES + SEPARATOR + "hosts.txt";
-    HostURLNormalizer normalizer = new HostURLNormalizer(hostsFile);
-    normalizer.setConf(conf);
-
-    // Force www. sub domain when hitting link without sub domain
-    Assert.assertEquals("http://www.example.org/page.html",
-        normalizer.normalize("http://example.org/page.html",
-            URLNormalizers.SCOPE_DEFAULT));
-
-    // Force no sub domain to www. URL's
-    Assert.assertEquals("http://example.net/path/to/something.html", normalizer
-        .normalize("http://www.example.net/path/to/something.html",
-            URLNormalizers.SCOPE_DEFAULT));
-
-    // Force all sub domains to www.
-    Assert.assertEquals("http://example.com/?does=it&still=work", normalizer
-        .normalize("http://example.com/?does=it&still=work",
-            URLNormalizers.SCOPE_DEFAULT));
-    Assert.assertEquals("http://example.com/buh", normalizer.normalize(
-        "http://http.www.example.com/buh", URLNormalizers.SCOPE_DEFAULT));
-    Assert.assertEquals("http://example.com/blaat", normalizer.normalize(
-        "http://whatever.example.com/blaat", URLNormalizers.SCOPE_DEFAULT));
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-pass/src/test/java/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-pass/src/test/java/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java b/nutch-plugins/urlnormalizer-pass/src/test/java/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
new file mode 100644
index 0000000..f470c62
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-pass/src/test/java/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.pass;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestPassURLNormalizer {
+
+  @Test
+  public void testPassURLNormalizer() {
+    Configuration conf = NutchConfiguration.create();
+
+    PassURLNormalizer normalizer = new PassURLNormalizer();
+    normalizer.setConf(conf);
+    String url = "http://www.example.com/test/..//";
+    String result = null;
+    try {
+      result = normalizer.normalize(url, URLNormalizers.SCOPE_DEFAULT);
+    } catch (MalformedURLException mue) {
+      Assert.fail(mue.toString());
+    }
+
+    Assert.assertEquals(url, result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java b/nutch-plugins/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
deleted file mode 100644
index f470c62..0000000
--- a/nutch-plugins/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.net.urlnormalizer.pass;
-
-import java.net.MalformedURLException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestPassURLNormalizer {
-
-  @Test
-  public void testPassURLNormalizer() {
-    Configuration conf = NutchConfiguration.create();
-
-    PassURLNormalizer normalizer = new PassURLNormalizer();
-    normalizer.setConf(conf);
-    String url = "http://www.example.com/test/..//";
-    String result = null;
-    try {
-      result = normalizer.normalize(url, URLNormalizers.SCOPE_DEFAULT);
-    } catch (MalformedURLException mue) {
-      Assert.fail(mue.toString());
-    }
-
-    Assert.assertEquals(url, result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-protocol/src/test/java/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-protocol/src/test/java/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java b/nutch-plugins/urlnormalizer-protocol/src/test/java/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
new file mode 100644
index 0000000..8880628
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-protocol/src/test/java/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.protocol;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestProtocolURLNormalizer extends TestCase {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  public void testProtocolURLNormalizer() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String protocolsFile = SAMPLES + SEPARATOR + "protocols.txt";
+    ProtocolURLNormalizer normalizer = new ProtocolURLNormalizer(protocolsFile);
+    normalizer.setConf(conf);
+
+    // No change
+    assertEquals("http://example.org/", normalizer.normalize("https://example.org/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/", normalizer.normalize("https://example.net/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // https to http
+    assertEquals("http://example.org/", normalizer.normalize("https://example.org/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/", normalizer.normalize("https://example.net/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // no change
+    assertEquals("https://example.io/", normalizer.normalize("https://example.io/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("https://example.nl/", normalizer.normalize("https://example.nl/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // http to https
+    assertEquals("https://example.io/", normalizer.normalize("http://example.io/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("https://example.nl/", normalizer.normalize("http://example.nl/", URLNormalizers.SCOPE_DEFAULT));
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java b/nutch-plugins/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
deleted file mode 100644
index 8880628..0000000
--- a/nutch-plugins/urlnormalizer-protocol/src/test/org/apache/nutch/net/urlnormalizer/protocol/TestProtocolURLNormalizer.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.net.urlnormalizer.protocol;
-
-import java.net.MalformedURLException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-
-import junit.framework.TestCase;
-
-public class TestProtocolURLNormalizer extends TestCase {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  public void testProtocolURLNormalizer() throws Exception {
-    Configuration conf = NutchConfiguration.create();
-
-    String protocolsFile = SAMPLES + SEPARATOR + "protocols.txt";
-    ProtocolURLNormalizer normalizer = new ProtocolURLNormalizer(protocolsFile);
-    normalizer.setConf(conf);
-
-    // No change
-    assertEquals("http://example.org/", normalizer.normalize("https://example.org/", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.net/", normalizer.normalize("https://example.net/", URLNormalizers.SCOPE_DEFAULT));
-    
-    // https to http
-    assertEquals("http://example.org/", normalizer.normalize("https://example.org/", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.net/", normalizer.normalize("https://example.net/", URLNormalizers.SCOPE_DEFAULT));
-    
-    // no change
-    assertEquals("https://example.io/", normalizer.normalize("https://example.io/", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("https://example.nl/", normalizer.normalize("https://example.nl/", URLNormalizers.SCOPE_DEFAULT));
-    
-    // http to https
-    assertEquals("https://example.io/", normalizer.normalize("http://example.io/", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("https://example.nl/", normalizer.normalize("http://example.nl/", URLNormalizers.SCOPE_DEFAULT));
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-querystring/src/test/java/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-querystring/src/test/java/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java b/nutch-plugins/urlnormalizer-querystring/src/test/java/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
new file mode 100644
index 0000000..b85c55d
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-querystring/src/test/java/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.querystring;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestQuerystringURLNormalizer extends TestCase {
+
+  public void testQuerystringURLNormalizer() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    QuerystringURLNormalizer normalizer = new QuerystringURLNormalizer();
+    normalizer.setConf(conf);
+
+    assertEquals("http://example.com/?a=b&c=d", normalizer.normalize(
+        "http://example.com/?c=d&a=b", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.com/a/b/c", normalizer.normalize(
+        "http://example.com/a/b/c", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.com:1234/a/b/c", normalizer.normalize(
+        "http://example.com:1234/a/b/c", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.com:1234/a/b/c#ref", normalizer.normalize(
+        "http://example.com:1234/a/b/c#ref", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.com:1234/a/b/c?a=b&c=d#ref",
+        normalizer.normalize("http://example.com:1234/a/b/c?c=d&a=b#ref",
+            URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.com/?a=b&a=c&c=d", normalizer.normalize(
+        "http://example.com/?c=d&a=b&a=c", URLNormalizers.SCOPE_DEFAULT));
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-querystring/src/test/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-querystring/src/test/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java b/nutch-plugins/urlnormalizer-querystring/src/test/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
deleted file mode 100644
index b85c55d..0000000
--- a/nutch-plugins/urlnormalizer-querystring/src/test/org/apache/nutch/net/urlnormalizer/querystring/TestQuerystringURLNormalizer.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.net.urlnormalizer.querystring;
-
-import java.net.MalformedURLException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-
-import junit.framework.TestCase;
-
-public class TestQuerystringURLNormalizer extends TestCase {
-
-  public void testQuerystringURLNormalizer() throws Exception {
-    Configuration conf = NutchConfiguration.create();
-
-    QuerystringURLNormalizer normalizer = new QuerystringURLNormalizer();
-    normalizer.setConf(conf);
-
-    assertEquals("http://example.com/?a=b&c=d", normalizer.normalize(
-        "http://example.com/?c=d&a=b", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.com/a/b/c", normalizer.normalize(
-        "http://example.com/a/b/c", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.com:1234/a/b/c", normalizer.normalize(
-        "http://example.com:1234/a/b/c", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.com:1234/a/b/c#ref", normalizer.normalize(
-        "http://example.com:1234/a/b/c#ref", URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.com:1234/a/b/c?a=b&c=d#ref",
-        normalizer.normalize("http://example.com:1234/a/b/c?c=d&a=b#ref",
-            URLNormalizers.SCOPE_DEFAULT));
-    assertEquals("http://example.com/?a=b&a=c&c=d", normalizer.normalize(
-        "http://example.com/?c=d&a=b&a=c", URLNormalizers.SCOPE_DEFAULT));
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-regex/src/test/java/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-regex/src/test/java/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java b/nutch-plugins/urlnormalizer-regex/src/test/java/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
new file mode 100644
index 0000000..cbf6c64
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-regex/src/test/java/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.net.urlnormalizer.regex;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.*;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+/** Unit tests for RegexUrlNormalizer. */
+public class TestRegexURLNormalizer {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(TestRegexURLNormalizer.class);
+
+  private RegexURLNormalizer normalizer;
+  private Configuration conf;
+  private Map<String, NormalizedURL[]> testData = new HashMap<String, NormalizedURL[]>();
+
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/urlnormalizer-regex/build.xml during plugin compilation.
+
+  public TestRegexURLNormalizer() throws IOException {
+    normalizer = new RegexURLNormalizer();
+    conf = NutchConfiguration.create();
+    normalizer.setConf(conf);
+    File[] configs = new File(sampleDir).listFiles(new FileFilter() {
+      public boolean accept(File f) {
+        if (f.getName().endsWith(".xml")
+            && f.getName().startsWith("regex-normalize-"))
+          return true;
+        return false;
+      }
+    });
+    for (int i = 0; i < configs.length; i++) {
+      try {
+        FileReader reader = new FileReader(configs[i]);
+        String cname = configs[i].getName();
+        cname = cname.substring(16, cname.indexOf(".xml"));
+        normalizer.setConfiguration(reader, cname);
+        NormalizedURL[] urls = readTestFile(cname);
+        testData.put(cname, urls);
+      } catch (Exception e) {
+        LOG.warn("Could load config from '" + configs[i] + "': " + e.toString());
+      }
+    }
+  }
+
+  @Test
+  public void testNormalizerDefault() throws Exception {
+    normalizeTest((NormalizedURL[]) testData.get(URLNormalizers.SCOPE_DEFAULT),
+        URLNormalizers.SCOPE_DEFAULT);
+  }
+
+  @Test
+  public void testNormalizerScope() throws Exception {
+    Iterator<String> it = testData.keySet().iterator();
+    while (it.hasNext()) {
+      String scope = it.next();
+      normalizeTest((NormalizedURL[]) testData.get(scope), scope);
+    }
+  }
+
+  private void normalizeTest(NormalizedURL[] urls, String scope)
+      throws Exception {
+    for (int i = 0; i < urls.length; i++) {
+      String url = urls[i].url;
+      String normalized = normalizer.normalize(urls[i].url, scope);
+      String expected = urls[i].expectedURL;
+      LOG.info("scope: " + scope + " url: " + url + " | normalized: "
+          + normalized + " | expected: " + expected);
+      Assert.assertEquals(urls[i].expectedURL, normalized);
+    }
+  }
+
+  private void bench(int loops, String scope) {
+    long start = System.currentTimeMillis();
+    try {
+      NormalizedURL[] expected = (NormalizedURL[]) testData.get(scope);
+      if (expected == null)
+        return;
+      for (int i = 0; i < loops; i++) {
+        normalizeTest(expected, scope);
+      }
+    } catch (Exception e) {
+      Assert.fail(e.toString());
+    }
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
+  }
+
+  private static class NormalizedURL {
+    String url;
+    String expectedURL;
+
+    public NormalizedURL(String line) {
+      String[] fields = line.split("\\s+");
+      url = fields[0];
+      expectedURL = fields[1];
+    }
+  }
+
+  private NormalizedURL[] readTestFile(String scope) throws IOException {
+    File f = new File(sampleDir, "regex-normalize-" + scope + ".test");
+    @SuppressWarnings("resource")
+    BufferedReader in = new BufferedReader(new InputStreamReader(
+        new FileInputStream(f), "UTF-8"));
+    List<NormalizedURL> list = new ArrayList<NormalizedURL>();
+    String line;
+    while ((line = in.readLine()) != null) {
+      if (line.trim().length() == 0 || line.startsWith("#")
+          || line.startsWith(" "))
+        continue;
+      list.add(new NormalizedURL(line));
+    }
+    return (NormalizedURL[]) list.toArray(new NormalizedURL[list.size()]);
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length == 0) {
+      System.err.println("TestRegexURLNormalizer [-bench <iter>] <scope>");
+      System.exit(-1);
+    }
+    boolean bench = false;
+    int iter = -1;
+    String scope = null;
+    for (int i = 0; i < args.length; i++) {
+      if (args[i].equals("-bench")) {
+        bench = true;
+        iter = Integer.parseInt(args[++i]);
+      } else
+        scope = args[i];
+    }
+    if (scope == null) {
+      System.err.println("Missing required scope name.");
+      System.exit(-1);
+    }
+    if (bench && iter < 0) {
+      System.err.println("Invalid number of iterations: " + iter);
+      System.exit(-1);
+    }
+    TestRegexURLNormalizer test = new TestRegexURLNormalizer();
+    NormalizedURL[] urls = (NormalizedURL[]) test.testData.get(scope);
+    if (urls == null) {
+      LOG.warn("Missing test data for scope '" + scope
+          + "', using default scope.");
+      scope = URLNormalizers.SCOPE_DEFAULT;
+      urls = (NormalizedURL[]) test.testData.get(scope);
+    }
+    if (bench) {
+      test.bench(iter, scope);
+    } else {
+      test.normalizeTest(urls, scope);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java b/nutch-plugins/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
deleted file mode 100644
index cbf6c64..0000000
--- a/nutch-plugins/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.net.urlnormalizer.regex;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileFilter;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.*;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.net.URLNormalizers;
-import org.apache.nutch.util.NutchConfiguration;
-
-/** Unit tests for RegexUrlNormalizer. */
-public class TestRegexURLNormalizer {
-  private static final Logger LOG = LoggerFactory
-      .getLogger(TestRegexURLNormalizer.class);
-
-  private RegexURLNormalizer normalizer;
-  private Configuration conf;
-  private Map<String, NormalizedURL[]> testData = new HashMap<String, NormalizedURL[]>();
-
-  // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data", ".");
-
-  // Make sure sample files are copied to "test.data" as specified in
-  // ./src/plugin/urlnormalizer-regex/build.xml during plugin compilation.
-
-  public TestRegexURLNormalizer() throws IOException {
-    normalizer = new RegexURLNormalizer();
-    conf = NutchConfiguration.create();
-    normalizer.setConf(conf);
-    File[] configs = new File(sampleDir).listFiles(new FileFilter() {
-      public boolean accept(File f) {
-        if (f.getName().endsWith(".xml")
-            && f.getName().startsWith("regex-normalize-"))
-          return true;
-        return false;
-      }
-    });
-    for (int i = 0; i < configs.length; i++) {
-      try {
-        FileReader reader = new FileReader(configs[i]);
-        String cname = configs[i].getName();
-        cname = cname.substring(16, cname.indexOf(".xml"));
-        normalizer.setConfiguration(reader, cname);
-        NormalizedURL[] urls = readTestFile(cname);
-        testData.put(cname, urls);
-      } catch (Exception e) {
-        LOG.warn("Could load config from '" + configs[i] + "': " + e.toString());
-      }
-    }
-  }
-
-  @Test
-  public void testNormalizerDefault() throws Exception {
-    normalizeTest((NormalizedURL[]) testData.get(URLNormalizers.SCOPE_DEFAULT),
-        URLNormalizers.SCOPE_DEFAULT);
-  }
-
-  @Test
-  public void testNormalizerScope() throws Exception {
-    Iterator<String> it = testData.keySet().iterator();
-    while (it.hasNext()) {
-      String scope = it.next();
-      normalizeTest((NormalizedURL[]) testData.get(scope), scope);
-    }
-  }
-
-  private void normalizeTest(NormalizedURL[] urls, String scope)
-      throws Exception {
-    for (int i = 0; i < urls.length; i++) {
-      String url = urls[i].url;
-      String normalized = normalizer.normalize(urls[i].url, scope);
-      String expected = urls[i].expectedURL;
-      LOG.info("scope: " + scope + " url: " + url + " | normalized: "
-          + normalized + " | expected: " + expected);
-      Assert.assertEquals(urls[i].expectedURL, normalized);
-    }
-  }
-
-  private void bench(int loops, String scope) {
-    long start = System.currentTimeMillis();
-    try {
-      NormalizedURL[] expected = (NormalizedURL[]) testData.get(scope);
-      if (expected == null)
-        return;
-      for (int i = 0; i < loops; i++) {
-        normalizeTest(expected, scope);
-      }
-    } catch (Exception e) {
-      Assert.fail(e.toString());
-    }
-    LOG.info("bench time (" + loops + ") "
-        + (System.currentTimeMillis() - start) + "ms");
-  }
-
-  private static class NormalizedURL {
-    String url;
-    String expectedURL;
-
-    public NormalizedURL(String line) {
-      String[] fields = line.split("\\s+");
-      url = fields[0];
-      expectedURL = fields[1];
-    }
-  }
-
-  private NormalizedURL[] readTestFile(String scope) throws IOException {
-    File f = new File(sampleDir, "regex-normalize-" + scope + ".test");
-    @SuppressWarnings("resource")
-    BufferedReader in = new BufferedReader(new InputStreamReader(
-        new FileInputStream(f), "UTF-8"));
-    List<NormalizedURL> list = new ArrayList<NormalizedURL>();
-    String line;
-    while ((line = in.readLine()) != null) {
-      if (line.trim().length() == 0 || line.startsWith("#")
-          || line.startsWith(" "))
-        continue;
-      list.add(new NormalizedURL(line));
-    }
-    return (NormalizedURL[]) list.toArray(new NormalizedURL[list.size()]);
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length == 0) {
-      System.err.println("TestRegexURLNormalizer [-bench <iter>] <scope>");
-      System.exit(-1);
-    }
-    boolean bench = false;
-    int iter = -1;
-    String scope = null;
-    for (int i = 0; i < args.length; i++) {
-      if (args[i].equals("-bench")) {
-        bench = true;
-        iter = Integer.parseInt(args[++i]);
-      } else
-        scope = args[i];
-    }
-    if (scope == null) {
-      System.err.println("Missing required scope name.");
-      System.exit(-1);
-    }
-    if (bench && iter < 0) {
-      System.err.println("Invalid number of iterations: " + iter);
-      System.exit(-1);
-    }
-    TestRegexURLNormalizer test = new TestRegexURLNormalizer();
-    NormalizedURL[] urls = (NormalizedURL[]) test.testData.get(scope);
-    if (urls == null) {
-      LOG.warn("Missing test data for scope '" + scope
-          + "', using default scope.");
-      scope = URLNormalizers.SCOPE_DEFAULT;
-      urls = (NormalizedURL[]) test.testData.get(scope);
-    }
-    if (bench) {
-      test.bench(iter, scope);
-    } else {
-      test.normalizeTest(urls, scope);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/20d28406/nutch-plugins/urlnormalizer-slash/src/test/java/org/apache/nutch/net/urlnormalizer/slash/TestSlashURLNormalizer.java
----------------------------------------------------------------------
diff --git a/nutch-plugins/urlnormalizer-slash/src/test/java/org/apache/nutch/net/urlnormalizer/slash/TestSlashURLNormalizer.java b/nutch-plugins/urlnormalizer-slash/src/test/java/org/apache/nutch/net/urlnormalizer/slash/TestSlashURLNormalizer.java
new file mode 100644
index 0000000..c3585e4
--- /dev/null
+++ b/nutch-plugins/urlnormalizer-slash/src/test/java/org/apache/nutch/net/urlnormalizer/slash/TestSlashURLNormalizer.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.urlnormalizer.slash;
+
+import java.net.MalformedURLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestSlashURLNormalizer extends TestCase {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  public void testSlashURLNormalizer() throws Exception {
+    Configuration conf = NutchConfiguration.create();
+
+    String slashesFile = SAMPLES + SEPARATOR + "slashes.txt";
+    SlashURLNormalizer normalizer = new SlashURLNormalizer(slashesFile);
+    normalizer.setConf(conf);
+
+    // No change
+    assertEquals("http://example.org/", normalizer.normalize("http://example.org/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/", normalizer.normalize("http://example.net/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // Don't touch base URL's
+    assertEquals("http://example.org", normalizer.normalize("http://example.org", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net", normalizer.normalize("http://example.net", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.org/", normalizer.normalize("http://example.org/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/", normalizer.normalize("http://example.net/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // Change
+    assertEquals("http://www.example.org/page/", normalizer.normalize("http://www.example.org/page", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://www.example.net/path/to/something", normalizer.normalize("http://www.example.net/path/to/something/", URLNormalizers.SCOPE_DEFAULT));
+    
+    // No change
+    assertEquals("http://example.org/buh/", normalizer.normalize("http://example.org/buh/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.net/blaat", normalizer.normalize("http://example.net/blaat", URLNormalizers.SCOPE_DEFAULT));
+    
+    // No change
+    assertEquals("http://example.nl/buh/", normalizer.normalize("http://example.nl/buh/", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://example.de/blaat", normalizer.normalize("http://example.de/blaat", URLNormalizers.SCOPE_DEFAULT));
+    
+    // Change
+    assertEquals("http://www.example.org/page/?a=b&c=d", normalizer.normalize("http://www.example.org/page?a=b&c=d", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://www.example.net/path/to/something?a=b&c=d", normalizer.normalize("http://www.example.net/path/to/something/?a=b&c=d", URLNormalizers.SCOPE_DEFAULT));
+    
+    // No change
+    assertEquals("http://www.example.org/noise.mp3", normalizer.normalize("http://www.example.org/noise.mp3", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://www.example.org/page.html", normalizer.normalize("http://www.example.org/page.html", URLNormalizers.SCOPE_DEFAULT));
+    assertEquals("http://www.example.org/page.shtml", normalizer.normalize("http://www.example.org/page.shtml", URLNormalizers.SCOPE_DEFAULT));
+
+    // Change
+    assertEquals("http://www.example.org/this.is.not.an_extension/", normalizer.normalize("http://www.example.org/this.is.not.an_extension", URLNormalizers.SCOPE_DEFAULT));
+  }
+}