You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:48:42 UTC

[26/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build for nutch-core and nutch-plugins

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java b/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
new file mode 100644
index 0000000..49bcfa9
--- /dev/null
+++ b/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
+import org.junit.Assert;
+
+public class WritableTestUtils {
+
+  /** Utility method for testing writables. */
+  public static void testWritable(Writable before) throws Exception {
+    testWritable(before, null);
+  }
+
+  /** Utility method for testing writables. */
+  public static void testWritable(Writable before, Configuration conf)
+      throws Exception {
+    Assert.assertEquals(before, writeRead(before, conf));
+  }
+
+  /** Utility method for testing writables. */
+  public static Writable writeRead(Writable before, Configuration conf)
+      throws Exception {
+
+    DataOutputBuffer dob = new DataOutputBuffer();
+    before.write(dob);
+
+    DataInputBuffer dib = new DataInputBuffer();
+    dib.reset(dob.getData(), dob.getLength());
+
+    Writable after = (Writable) before.getClass().newInstance();
+    if (conf != null) {
+      ((Configurable) after).setConf(conf);
+    }
+    after.readFields(dib);
+    return after;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/crawl-tests.xml
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/crawl-tests.xml b/nutch-core/src/test/resources/crawl-tests.xml
new file mode 100644
index 0000000..01fc683
--- /dev/null
+++ b/nutch-core/src/test/resources/crawl-tests.xml
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+
+<!-- Configuration overrides used during unit tests. -->
+
+<configuration>
+
+<property>
+  <name>plugin.includes</name>
+  <value>parse-tika|protocol-http|urlfilter-suffix|scoring-opic</value>
+  <description>Enable required plugins.</description>
+</property>
+
+<property>
+  <name>content.server.port</name>
+  <value>55000</value>
+  <description>Port of http server serving content.</description>
+</property>
+
+<property>
+  <name>fetcher.server.delay</name>
+  <value>0.2</value>
+  <description>The number of seconds the fetcher will delay between 
+   successive requests to the same server.</description>
+</property>
+
+<property>
+  <name>http.agent.name</name>
+  <value>test-nutch</value>
+</property>
+
+<property>
+  <name>http.robots.agents</name>
+  <value>test-nutch,*</value>
+</property>
+
+<property>
+  <name>http.agent.name.check</name>
+  <value>true</value>
+</property>
+
+<property>                                                                                                                                                   
+  <name>http.robots.agents</name>                                                                                                                            
+  <value>test-nutch,*</value>                                                                                                                                
+  <description>The agent strings we'll look for in robots.txt files,                                                                                         
+  comma-separated, in decreasing order of precedence. You should                                                                                             
+  put the value of http.agent.name as the first agent name, and keep the                                                                                     
+  default * at the end of the list. E.g.: BlurflDev,Blurfl,*                                                                                                 
+  </description>                                                                                                                                             
+</property>
+
+<property>
+  <name>io.serializations</name>
+  <value>org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.JavaSerialization</value>
+  <!-- org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,
+  org.apache.hadoop.io.serializer.avro.AvroReflectSerialization,
+  org.apache.hadoop.io.serializer.avro.AvroGenericSerialization, -->
+  <description>A list of serialization classes that can be used for
+  obtaining serializers and deserializers.</description>
+</property>
+
+</configuration>
+

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/domain-urlfilter.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/domain-urlfilter.txt b/nutch-core/src/test/resources/domain-urlfilter.txt
new file mode 100644
index 0000000..955700a
--- /dev/null
+++ b/nutch-core/src/test/resources/domain-urlfilter.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-domsin plugin
+
+com
+org
+net
+edu
+gov

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html b/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
new file mode 100644
index 0000000..6444c41
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+  <title>page a</title>
+ </head>
+<body>
+This is page a
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/exception.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/exception.html b/nutch-core/src/test/resources/fetch-test-site/exception.html
new file mode 100644
index 0000000..e1192a1
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/exception.html
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Exception</TITLE>
+<META http-equiv="Content-Type" content="text/html; charset=unicode">
+</HEAD>
+<BODY>
+!!Trying to parse this one will fail with a MalformedInputException!!
+
+Nutch fetcher test page.
+</BODY>
+</HTML>
+

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/index.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/index.html b/nutch-core/src/test/resources/fetch-test-site/index.html
new file mode 100644
index 0000000..d73ff3f
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/index.html
@@ -0,0 +1,13 @@
+<html>
+ <head>
+  <title>front page</title>
+ </head>
+<body>
+This is front page.
+<a href="pagea.html">Page a</a>
+<a href="pageb.html">Page b</a>
+<a href="dup_of_pagea.html">dup of Page a</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html b/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
new file mode 100644
index 0000000..5dcf7c2
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
@@ -0,0 +1,23 @@
+<html>
+<head>
+<title>nested spider trap</title>
+</head>
+
+<body>Nutch fetcher test page
+<table>
+  <tr> 
+    <td>
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i> 
+<b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b> 
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i
 ></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
 </i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i> 
+<i><b><i><b><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></
 b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></b></i></b></i> 
+<b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><
 b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b
 ><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b>
 <i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><
 i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i
 ><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i>
 <b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><
 b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b><
 /i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></
 b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i
 ></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
 </i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i><
 /b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></
 i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b
 ></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
 </b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b><
 /i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i> 
+</b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b> 
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><
 i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
 </i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i> 
+
+    </td>
+  </tr>
+ 
+</table>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/pagea.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/pagea.html b/nutch-core/src/test/resources/fetch-test-site/pagea.html
new file mode 100644
index 0000000..6444c41
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/pagea.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+  <title>page a</title>
+ </head>
+<body>
+This is page a
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/pageb.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/pageb.html b/nutch-core/src/test/resources/fetch-test-site/pageb.html
new file mode 100644
index 0000000..66e3725
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/pageb.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+  <title>bage b</title>
+ </head>
+<body>
+This is page b
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/robots.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/robots.txt b/nutch-core/src/test/resources/fetch-test-site/robots.txt
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/filter-all.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/filter-all.txt b/nutch-core/src/test/resources/filter-all.txt
new file mode 100644
index 0000000..4ed567a
--- /dev/null
+++ b/nutch-core/src/test/resources/filter-all.txt
@@ -0,0 +1,7 @@
+# Config file for urlfilter-suffix plugin
+# Filter away all urls
+
+# case-insensitive, disallow unknown suffixes
+-I
+
+# allow these

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/log4j.properties b/nutch-core/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3ff115f
--- /dev/null
+++ b/nutch-core/src/test/resources/log4j.properties
@@ -0,0 +1,7 @@
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=info,stdout
+log4j.threshold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/nutch-site.xml
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/nutch-site.xml b/nutch-core/src/test/resources/nutch-site.xml
new file mode 100644
index 0000000..dd40873
--- /dev/null
+++ b/nutch-core/src/test/resources/nutch-site.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+
+<!-- Configuration overrides used during unit tests. -->
+
+<configuration>
+
+<property>
+  <name>plugin.includes</name>
+  <value>.*</value>
+  <description>Enable all plugins during unit testing.</description>
+</property>
+
+<property>
+  <name>distributed.search.test.port</name>
+  <value>60000</value>
+  <description>TCP port used during junit testing.</description>
+</property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-mime-util/test.xlsx
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-mime-util/test.xlsx b/nutch-core/src/test/resources/test-mime-util/test.xlsx
new file mode 100644
index 0000000..de33f28
Binary files /dev/null and b/nutch-core/src/test/resources/test-mime-util/test.xlsx differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc
new file mode 100644
index 0000000..c321777
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc
new file mode 100644
index 0000000..5c5d11f
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data
new file mode 100644
index 0000000..0f8d263
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index
new file mode 100644
index 0000000..4dfeaec
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc
new file mode 100644
index 0000000..c4d315a
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc
new file mode 100644
index 0000000..6dd171e
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data
new file mode 100644
index 0000000..66b1f8d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index
new file mode 100644
index 0000000..ad4ed47
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc
new file mode 100644
index 0000000..8d5ffa4
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000
new file mode 100644
index 0000000..41ef146
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000 differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc
new file mode 100644
index 0000000..683a1dd
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000
new file mode 100644
index 0000000..3232abf
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000 differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc
new file mode 100644
index 0000000..47164ee
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc
new file mode 100644
index 0000000..a32d62d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data
new file mode 100644
index 0000000..5b71a24
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index
new file mode 100644
index 0000000..d931103
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc
new file mode 100644
index 0000000..53c925c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc
new file mode 100644
index 0000000..5ba878c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data
new file mode 100644
index 0000000..b58f97f
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index
new file mode 100644
index 0000000..9880a27
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc
new file mode 100644
index 0000000..1b49819
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc
new file mode 100644
index 0000000..5aae648
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data
new file mode 100644
index 0000000..8069e84
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index
new file mode 100644
index 0000000..9b19ce9
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc
new file mode 100644
index 0000000..926ced1
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc
new file mode 100644
index 0000000..714a1e8
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data
new file mode 100644
index 0000000..f36a9fa
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index
new file mode 100644
index 0000000..c648d89
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc
new file mode 100644
index 0000000..3ee3c94
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000
new file mode 100644
index 0000000..1ef0406
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000 differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc
new file mode 100644
index 0000000..7948825
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000
new file mode 100644
index 0000000..3a83a82
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000 differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc
new file mode 100644
index 0000000..b46b6f6
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc
new file mode 100644
index 0000000..18766e6
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data
new file mode 100644
index 0000000..9a1f284
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index
new file mode 100644
index 0000000..47fb983
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc
new file mode 100644
index 0000000..ceada1b
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc
new file mode 100644
index 0000000..b756b5c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data
new file mode 100644
index 0000000..ad96df0
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index
new file mode 100644
index 0000000..a3e1d8d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index differ

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/build-plugin.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/build-plugin.xml b/nutch-plugins/build-plugin.xml
new file mode 100755
index 0000000..c759d5f
--- /dev/null
+++ b/nutch-plugins/build-plugin.xml
@@ -0,0 +1,255 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- Imported by plugin build.xml files to define default targets. -->
+<project xmlns:ivy="antlib:org.apache.ivy.ant">
+
+  <property name="name" value="${ant.project.name}"/>
+  <property name="root" value="${basedir}"/>
+
+  <!-- load plugin-specific properties first -->
+  <property file="${user.home}/${name}.build.properties" />
+  <property file="${root}/build.properties" />
+
+  <property name="nutch.root" location="${root}/../../../"/>
+
+  <property name="src.dir" location="${root}/src/java"/>
+  <property name="src.test" location="${root}/src/test"/>
+
+  <available file="${src.test}" type="dir" property="test.available"/>
+
+  <property name="conf.dir" location="${nutch.root}/conf"/>
+
+  <property name="build.dir" location="${nutch.root}/build/${name}"/>
+  <property name="build.classes" location="${build.dir}/classes"/>
+  <property name="build.test" location="${build.dir}/test"/>
+  <property name="build.test.lib" location="${build.test}/lib"/>
+
+  <property name="deploy.dir" location="${nutch.root}/build/plugins/${name}"/>
+
+  <!-- load nutch defaults last so that they can be overridden above -->
+  <property file="${nutch.root}/default.properties" />
+
+  <ivy:settings id="ivy.instance" file="${nutch.root}/ivy/ivysettings.xml" />
+
+  <path id="plugin.deps"/>
+
+  <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
+
+  <!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <fileset refid="lib.jars"/>
+    <pathelement location="${nutch.root}/build/classes"/>
+    <fileset dir="${nutch.root}/build/lib">
+      <include name="*.jar" />
+    </fileset>
+    <path refid="plugin.deps"/>
+    <fileset dir="${deploy.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <!-- the unit test classpath -->
+  <path id="test.classpath">
+    <pathelement location="${build.test}" />
+    <pathelement location="${nutch.root}/build/test/classes"/>
+    <pathelement location="${nutch.root}/src/test"/>
+    <pathelement location="${conf.dir}"/>
+    <pathelement location="${nutch.root}/build"/>
+    <!-- test dependencies specific to current plugin -->
+    <fileset dir="${build.test.lib}">
+      <include name="*.jar" />
+    </fileset>
+    <!-- global test dependencies -->
+    <fileset dir="${nutch.root}/build/test/lib">
+      <include name="*.jar" />
+    </fileset>
+    <path refid="classpath"/>
+  </path>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${build.test}"/>
+    <mkdir dir="${build.test.lib}"/>
+    <mkdir dir="${deploy.dir}"/>
+
+    <antcall target="init-plugin"/>
+  </target>
+
+  <!-- to be overridden by sub-projects --> 
+  <target name="init-plugin"/>
+
+  <!--
+   ! Used to build plugin compilation dependencies
+   ! (to be overridden by plugins)
+   !-->
+  <target name="deps-jar"/>
+
+  <!--
+   ! Used to deploy plugin runtime dependencies
+   ! (to be overridden by plugins)
+   !-->
+  <target name="deps-test"/>
+
+  <!--
+   ! Used to compile test for plugin runtime dependencies
+   ! (to be overridden by plugins)
+   !-->
+  <target name="deps-test-compile"/>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="compile" depends="init,deps-jar, resolve-default">
+    <echo message="Compiling plugin: ${name}"/>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${src.dir}"
+     includes="**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <classpath refid="classpath"/>
+    </javac>
+  </target>
+
+  <target name="compile-core">
+    <ant target="compile-core" inheritall="false" dir="${nutch.root}"/>
+    <ant target="compile"/>
+  </target>
+  
+  <!-- ================================================================== -->
+  <!-- Make plugin .jar                                                   -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile">
+    <jar
+      jarfile="${build.dir}/${name}.jar"
+      basedir="${build.classes}"
+    />
+  </target>
+
+  <target name="jar-core" depends="compile-core">
+    <jar
+        jarfile="${build.dir}/${name}.jar"
+        basedir="${build.classes}"
+        />
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Deploy plugin to ${deploy.dir}                                     -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="deploy" depends="jar, deps-test">
+    <mkdir dir="${deploy.dir}"/>
+    <copy file="plugin.xml" todir="${deploy.dir}" 
+          preservelastmodified="true"/>
+    <available property="lib-available"
+                 file="${build.dir}/${name}.jar"/>
+    <antcall target="copy-generated-lib"/>
+    <copy todir="${deploy.dir}" flatten="true">
+      <fileset refid="lib.jars"/>
+    </copy>
+  </target>
+	
+  <target name="copy-generated-lib" if="lib-available">
+    <copy file="${build.dir}/${name}.jar" todir="${deploy.dir}" failonerror="false"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  --> 
+  <!-- ================================================================== -->
+  <target name="compile-test" depends="compile, deps-test-compile" if="test.available">
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${src.test}"
+     includes="**/*.java"
+     destdir="${build.test}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <classpath refid="test.classpath"/>
+    </javac>    
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     --> 
+  <!-- ================================================================== -->
+  <target name="test" depends="compile-test, deploy" if="test.available">
+    <echo message="Testing plugin: ${name}"/>
+
+    <junit printsummary="yes" haltonfailure="no" fork="yes"
+      errorProperty="tests.failed" failureProperty="tests.failed">
+      <sysproperty key="test.data" value="${build.test}/data"/>
+      <sysproperty key="test.input" value="${root}/data"/>
+      <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/> 
+      <classpath refid="test.classpath"/>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${build.test}" unless="testcase">
+        <fileset dir="${src.test}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${build.test}" if="testcase">
+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+
+    <fail if="tests.failed">Tests failed!</fail>
+
+  </target>   
+
+  <!-- target: resolve  ================================================= -->
+  <target name="resolve-default" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="ivy.xml" conf="default" log="download-only"/>
+    <ivy:retrieve pattern="${deploy.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+  </target>
+
+  <target name="resolve-test" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="ivy.xml" conf="test" log="download-only"/>
+    <ivy:retrieve pattern="${build.test.lib}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+  <!-- target: clean  =================================================== -->
+  <target name="clean" depends="clean-build, clean-lib" description="--> clean the project" />
+
+  <!-- target: clean-lib  =============================================== -->
+  <target name="clean-lib" description="--> clean the project libraries directory (dependencies)">
+    <delete includeemptydirs="true" dir="${build.lib.dir}"/>
+  </target>
+
+  <!-- target: clean-build  ============================================= -->
+  <target name="clean-build" description="--> clean the project built files">
+    <delete includeemptydirs="true" dir="${build.dir}"/>
+    <delete includeemptydirs="true" dir="${deploy.dir}"/>
+  </target>
+
+</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/build.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/build.xml b/nutch-plugins/build.xml
new file mode 100755
index 0000000..75ae2e7
--- /dev/null
+++ b/nutch-plugins/build.xml
@@ -0,0 +1,213 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="Nutch" default="deploy-core" basedir=".">
+
+  <target name="deploy-core">
+    <ant target="compile-core" inheritall="false" dir="../.."/>
+    <ant target="deploy"/>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Build & deploy all the plugin jars.                    -->
+  <!-- ====================================================== -->
+  <target name="deploy">
+     <ant dir="creativecommons" target="deploy"/>
+     <ant dir="feed" target="deploy"/>
+     <ant dir="headings" target="deploy"/>
+     <ant dir="index-basic" target="deploy"/>
+     <ant dir="index-anchor" target="deploy"/>
+     <ant dir="index-geoip" target="deploy"/>
+     <ant dir="index-more" target="deploy"/>
+     <ant dir="index-replace" target="deploy"/>
+     <ant dir="index-static" target="deploy"/>
+     <ant dir="index-metadata" target="deploy"/>
+     <ant dir="index-links" target="deploy"/>
+     <ant dir="mimetype-filter" target="deploy"/>
+     <ant dir="indexer-cloudsearch" target="deploy"/>
+     <ant dir="indexer-dummy" target="deploy"/>
+     <ant dir="indexer-elastic" target="deploy"/>
+     <ant dir="indexer-solr" target="deploy"/>
+     <ant dir="language-identifier" target="deploy"/>
+     <ant dir="lib-http" target="deploy"/>
+     <ant dir="lib-nekohtml" target="deploy"/>
+     <ant dir="lib-regex-filter" target="deploy"/>
+     <ant dir="lib-xml" target="deploy"/>
+     <ant dir="microformats-reltag" target="deploy"/>
+     <ant dir="nutch-extensionpoints" target="deploy"/>
+     <ant dir="protocol-file" target="deploy"/>
+     <ant dir="protocol-ftp" target="deploy"/>
+     <ant dir="protocol-http" target="deploy"/>
+     <ant dir="protocol-httpclient" target="deploy"/>
+     <ant dir="lib-htmlunit" target="deploy"/>
+     <ant dir="protocol-htmlunit" target="deploy" />
+     <ant dir="lib-selenium" target="deploy"/>
+     <ant dir="protocol-selenium" target="deploy" />
+     <ant dir="protocol-interactiveselenium" target="deploy" />
+     <ant dir="parse-ext" target="deploy"/>
+     <ant dir="parse-js" target="deploy"/>
+     <ant dir="parse-html" target="deploy"/>
+     <ant dir="parse-metatags" target="deploy"/>
+     <ant dir="parse-swf" target="deploy"/>
+     <ant dir="parse-tika" target="deploy"/>
+     <ant dir="parse-zip" target="deploy"/>
+     <ant dir="scoring-depth" target="deploy"/>
+     <ant dir="scoring-opic" target="deploy"/>
+     <ant dir="scoring-link" target="deploy"/>
+     <ant dir="scoring-similarity" target="deploy"/>
+     <ant dir="subcollection" target="deploy"/>
+     <ant dir="tld" target="deploy"/>
+     <ant dir="urlfilter-automaton" target="deploy"/>
+     <ant dir="urlfilter-domain" target="deploy" />
+     <ant dir="urlfilter-domainblacklist" target="deploy" />
+     <ant dir="urlfilter-prefix" target="deploy"/>
+     <ant dir="urlfilter-regex" target="deploy"/>
+     <ant dir="urlfilter-suffix" target="deploy"/>
+     <ant dir="urlfilter-validator" target="deploy"/>
+     <ant dir="urlfilter-ignoreexempt" target="deploy"/>
+     <ant dir="parsefilter-naivebayes" target="deploy"/>
+     <ant dir="parsefilter-regex" target="deploy"/>
+     <ant dir="urlmeta" target="deploy"/>
+     <ant dir="urlnormalizer-ajax" target="deploy"/>
+     <ant dir="urlnormalizer-basic" target="deploy"/>
+     <ant dir="urlnormalizer-host" target="deploy"/>
+     <ant dir="urlnormalizer-pass" target="deploy"/>
+     <ant dir="urlnormalizer-protocol" target="deploy"/>
+     <ant dir="urlnormalizer-querystring" target="deploy"/>
+     <ant dir="urlnormalizer-regex" target="deploy"/>
+     <ant dir="urlnormalizer-slash" target="deploy"/>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Test all of the plugins.                               -->
+  <!-- ====================================================== -->
+  <target name="test">
+    <parallel threadCount="2">
+     <ant dir="creativecommons" target="test"/>
+     <ant dir="index-basic" target="test"/>
+     <ant dir="index-anchor" target="test"/>
+     <ant dir="index-geoip" target="test"/>
+     <ant dir="index-more" target="test"/>
+     <ant dir="index-static" target="test"/>
+     <ant dir="index-replace" target="test"/>
+     <ant dir="index-links" target="test"/>
+     <ant dir="mimetype-filter" target="test"/>
+     <ant dir="language-identifier" target="test"/>
+     <ant dir="lib-http" target="test"/>
+     <ant dir="protocol-file" target="test"/>
+     <ant dir="protocol-http" target="test"/>
+     <ant dir="protocol-httpclient" target="test"/>
+     <!--ant dir="parse-ext" target="test"/-->
+     <ant dir="feed" target="test"/>
+     <ant dir="parse-html" target="test"/>
+     <ant dir="parse-metatags" target="test"/>
+     <ant dir="parse-swf" target="test"/>
+     <ant dir="parse-tika" target="test"/>
+     <ant dir="parse-zip" target="test"/>
+     <ant dir="parsefilter-regex" target="test"/>
+     <ant dir="subcollection" target="test"/>
+     <ant dir="urlfilter-automaton" target="test"/>
+     <ant dir="urlfilter-domain" target="test"/>
+     <ant dir="urlfilter-domainblacklist" target="test"/>
+     <ant dir="urlfilter-prefix" target="test"/>
+     <ant dir="urlfilter-regex" target="test"/>
+     <ant dir="urlfilter-suffix" target="test"/>
+     <ant dir="urlfilter-validator" target="test"/>
+     <ant dir="urlfilter-ignoreexempt" target="test"/>
+     <ant dir="urlnormalizer-ajax" target="test"/>
+     <ant dir="urlnormalizer-basic" target="test"/>
+     <ant dir="urlnormalizer-host" target="test"/>
+     <ant dir="urlnormalizer-pass" target="test"/>
+     <ant dir="urlnormalizer-protocol" target="test"/>
+     <ant dir="urlnormalizer-querystring" target="test"/>
+     <ant dir="urlnormalizer-regex" target="test"/>
+     <ant dir="urlnormalizer-slash" target="test"/>
+    </parallel>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Clean all of the plugins.                              -->
+  <!-- ====================================================== -->
+  <target name="clean">
+    <ant dir="creativecommons" target="clean"/>
+    <ant dir="feed" target="clean"/>
+    <ant dir="headings" target="clean"/>
+    <ant dir="index-basic" target="clean"/>
+    <ant dir="index-anchor" target="clean"/>
+    <ant dir="index-geoip" target="clean"/>
+    <ant dir="index-more" target="clean"/>
+    <ant dir="index-static" target="clean"/>
+    <ant dir="index-replace" target="clean"/>
+    <ant dir="index-metadata" target="clean"/>
+    <ant dir="index-links" target="clean"/>
+    <ant dir="mimetype-filter" target="clean"/>
+    <ant dir="indexer-cloudsearch" target="clean"/>
+    <ant dir="indexer-dummy" target="clean"/>
+    <ant dir="indexer-elastic" target="clean"/>
+    <ant dir="indexer-solr" target="clean"/>
+    <ant dir="language-identifier" target="clean"/>
+    <!-- <ant dir="lib-commons-httpclient" target="clean"/> -->
+    <ant dir="lib-http" target="clean"/>
+    <!-- <ant dir="lib-lucene-analyzers" target="clean"/>-->
+    <ant dir="lib-nekohtml" target="clean"/>
+    <ant dir="lib-regex-filter" target="clean"/>
+    <ant dir="lib-xml" target="clean"/>
+    <ant dir="microformats-reltag" target="clean"/>
+    <ant dir="nutch-extensionpoints" target="clean"/>
+    <ant dir="protocol-file" target="clean"/>
+    <ant dir="protocol-ftp" target="clean"/>
+    <ant dir="protocol-http" target="clean"/>
+    <ant dir="protocol-httpclient" target="clean"/>
+    <ant dir="lib-htmlunit" target="clean"/>
+    <ant dir="protocol-htmlunit" target="clean" />
+    <ant dir="lib-selenium" target="clean"/>
+    <ant dir="protocol-selenium" target="clean" />
+    <ant dir="protocol-interactiveselenium" target="clean" />
+    <ant dir="parse-ext" target="clean"/>
+    <ant dir="parse-js" target="clean"/>
+    <ant dir="parse-html" target="clean"/>
+    <ant dir="parse-metatags" target="clean"/>
+    <ant dir="parse-swf" target="clean"/>
+    <ant dir="parse-tika" target="clean"/>
+    <ant dir="parse-zip" target="clean"/>
+    <ant dir="parsefilter-regex" target="clean"/>
+    <ant dir="scoring-depth" target="clean"/>
+    <ant dir="scoring-opic" target="clean"/>
+    <ant dir="scoring-link" target="clean"/>
+    <ant dir="scoring-similarity" target="clean"/>
+    <ant dir="subcollection" target="clean"/>
+    <ant dir="tld" target="clean"/>
+    <ant dir="urlfilter-automaton" target="clean"/>
+    <ant dir="urlfilter-domain" target="clean" />
+    <ant dir="urlfilter-domainblacklist" target="clean" />
+    <ant dir="urlfilter-prefix" target="clean"/>
+    <ant dir="urlfilter-regex" target="clean"/>
+    <ant dir="urlfilter-suffix" target="clean"/>
+    <ant dir="urlfilter-validator" target="clean"/>
+    <ant dir="urlfilter-ignoreexempt" target="clean"/>
+    <ant dir="parsefilter-naivebayes" target="clean" />
+    <ant dir="urlmeta" target="clean"/>
+    <ant dir="urlnormalizer-ajax" target="clean"/>
+    <ant dir="urlnormalizer-basic" target="clean"/>
+    <ant dir="urlnormalizer-host" target="clean"/>
+    <ant dir="urlnormalizer-pass" target="clean"/>
+    <ant dir="urlnormalizer-protocol" target="clean"/>
+    <ant dir="urlnormalizer-querystring" target="clean"/>
+    <ant dir="urlnormalizer-regex" target="clean"/>
+    <ant dir="urlnormalizer-slash" target="clean"/>
+  </target>
+</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/README.txt
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/README.txt b/nutch-plugins/creativecommons/README.txt
new file mode 100644
index 0000000..d4d7b65
--- /dev/null
+++ b/nutch-plugins/creativecommons/README.txt
@@ -0,0 +1 @@
+Support for crawling and searching Creative-Commons licensed content. 

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/build.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/build.xml b/nutch-plugins/creativecommons/build.xml
new file mode 100755
index 0000000..6443d7f
--- /dev/null
+++ b/nutch-plugins/creativecommons/build.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="creativecommons" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- Deploy Unit test dependencies -->
+  <target name="deps-test">
+    <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+   <!--  <ant target="deploy" inheritall="false" dir="../parse-html"/> -->
+  </target>
+
+</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt b/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
new file mode 100644
index 0000000..324617f
--- /dev/null
+++ b/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
@@ -0,0 +1,18 @@
+# Creative Commnons crawl filter
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'.  The first matching pattern in the file
+# determines whether a URL is included or ignored.  If no pattern
+# matches, the URL is ignored.
+
+# skip file:, ftp:, & mailto: urls
+-^(file|ftp|mailto|https):
+
+# skip image and other suffixes we can't yet parse
+-\.(gif|GIF|jpg|JPG|ico|ICO|css|sit|eps|wmf|rtf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe|mp3|rss|xml|doc|pdf|txt|DOC|PDF|TXT)$
+
+# skip URLs containing certain characters as probable queries, etc.
+-[?*!@=]
+
+# accept anything else
++.

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/conf/nutch-site.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/conf/nutch-site.xml b/nutch-plugins/creativecommons/conf/nutch-site.xml
new file mode 100644
index 0000000..71e344b
--- /dev/null
+++ b/nutch-plugins/creativecommons/conf/nutch-site.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+
+<!-- Creative Commons' Nutch configuration -->
+
+<nutch-conf>
+
+<property>
+  <name>http.agent.name</name>
+  <value>CreativeCommons</value>
+  <description>Our HTTP 'User-Agent' request header.</description>
+</property>
+
+<property>
+  <name>http.robots.agents</name>
+  <value>CreativeCommons,Nutch,*</value>
+  <description>The agent strings we'll look for in robots.txt files,
+  comma-separated, in decreasing order of precedence.</description>
+</property>
+
+<property>
+  <name>fetcher.server.delay</name>
+  <value>2.0</value>
+  <description>We need to be more polite than when crawling an
+  intranet that we control.</description>
+</property>
+
+<property>
+  <name>http.max.delays</name>
+  <value>3</value>
+  <description>The CC crawl visits a large number of different
+  hosts, so we should not need to delay much.</description>
+</property>
+
+<property>
+  <name>creativecommons.exclude.unlicensed</name>
+  <value>true</value>
+  <description>Exclude HTML content which does not contain a CC license.
+  </description>
+</property>
+
+<property>
+  <name>plugin.excludes</name>
+  <value>parse-(?!html).*</value>
+  <description>Exclude non-HTML content, since we don't know how to
+  find a CC license in anything but HTML. 
+  </description>
+</property>
+
+</nutch-conf>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/ivy.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/ivy.xml b/nutch-plugins/creativecommons/ivy.xml
new file mode 100644
index 0000000..1a86d68
--- /dev/null
+++ b/nutch-plugins/creativecommons/ivy.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="../../..//ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>