You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:48:42 UTC
[26/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build
for nutch-core and nutch-plugins
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java b/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
new file mode 100644
index 0000000..49bcfa9
--- /dev/null
+++ b/nutch-core/src/test/java/org/apache/nutch/util/WritableTestUtils.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
+import org.junit.Assert;
+
+public class WritableTestUtils {
+
+ /** Utility method for testing writables. */
+ public static void testWritable(Writable before) throws Exception {
+ testWritable(before, null);
+ }
+
+ /** Utility method for testing writables. */
+ public static void testWritable(Writable before, Configuration conf)
+ throws Exception {
+ Assert.assertEquals(before, writeRead(before, conf));
+ }
+
+ /** Utility method for testing writables. */
+ public static Writable writeRead(Writable before, Configuration conf)
+ throws Exception {
+
+ DataOutputBuffer dob = new DataOutputBuffer();
+ before.write(dob);
+
+ DataInputBuffer dib = new DataInputBuffer();
+ dib.reset(dob.getData(), dob.getLength());
+
+ Writable after = (Writable) before.getClass().newInstance();
+ if (conf != null) {
+ ((Configurable) after).setConf(conf);
+ }
+ after.readFields(dib);
+ return after;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/crawl-tests.xml
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/crawl-tests.xml b/nutch-core/src/test/resources/crawl-tests.xml
new file mode 100644
index 0000000..01fc683
--- /dev/null
+++ b/nutch-core/src/test/resources/crawl-tests.xml
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+
+<!-- Configuration overrides used during unit tests. -->
+
+<configuration>
+
+<property>
+ <name>plugin.includes</name>
+ <value>parse-tika|protocol-http|urlfilter-suffix|scoring-opic</value>
+ <description>Enable required plugins.</description>
+</property>
+
+<property>
+ <name>content.server.port</name>
+ <value>55000</value>
+ <description>Port of http server serving content.</description>
+</property>
+
+<property>
+ <name>fetcher.server.delay</name>
+ <value>0.2</value>
+ <description>The number of seconds the fetcher will delay between
+ successive requests to the same server.</description>
+</property>
+
+<property>
+ <name>http.agent.name</name>
+ <value>test-nutch</value>
+</property>
+
+<property>
+ <name>http.robots.agents</name>
+ <value>test-nutch,*</value>
+</property>
+
+<property>
+ <name>http.agent.name.check</name>
+ <value>true</value>
+</property>
+
+<property>
+ <name>http.robots.agents</name>
+ <value>test-nutch,*</value>
+ <description>The agent strings we'll look for in robots.txt files,
+ comma-separated, in decreasing order of precedence. You should
+ put the value of http.agent.name as the first agent name, and keep the
+ default * at the end of the list. E.g.: BlurflDev,Blurfl,*
+ </description>
+</property>
+
+<property>
+ <name>io.serializations</name>
+ <value>org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.JavaSerialization</value>
+ <!-- org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,
+ org.apache.hadoop.io.serializer.avro.AvroReflectSerialization,
+ org.apache.hadoop.io.serializer.avro.AvroGenericSerialization, -->
+ <description>A list of serialization classes that can be used for
+ obtaining serializers and deserializers.</description>
+</property>
+
+</configuration>
+
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/domain-urlfilter.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/domain-urlfilter.txt b/nutch-core/src/test/resources/domain-urlfilter.txt
new file mode 100644
index 0000000..955700a
--- /dev/null
+++ b/nutch-core/src/test/resources/domain-urlfilter.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-domsin plugin
+
+com
+org
+net
+edu
+gov
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html b/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
new file mode 100644
index 0000000..6444c41
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/dup_of_pagea.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+ <title>page a</title>
+ </head>
+<body>
+This is page a
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/exception.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/exception.html b/nutch-core/src/test/resources/fetch-test-site/exception.html
new file mode 100644
index 0000000..e1192a1
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/exception.html
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Exception</TITLE>
+<META http-equiv="Content-Type" content="text/html; charset=unicode">
+</HEAD>
+<BODY>
+!!Trying to parse this one will fail with a MalformedInputException!!
+
+Nutch fetcher test page.
+</BODY>
+</HTML>
+
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/index.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/index.html b/nutch-core/src/test/resources/fetch-test-site/index.html
new file mode 100644
index 0000000..d73ff3f
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/index.html
@@ -0,0 +1,13 @@
+<html>
+ <head>
+ <title>front page</title>
+ </head>
+<body>
+This is front page.
+<a href="pagea.html">Page a</a>
+<a href="pageb.html">Page b</a>
+<a href="dup_of_pagea.html">dup of Page a</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html b/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
new file mode 100644
index 0000000..5dcf7c2
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/nested_spider_trap.html
@@ -0,0 +1,23 @@
+<html>
+<head>
+<title>nested spider trap</title>
+</head>
+
+<body>Nutch fetcher test page
+<table>
+ <tr>
+ <td>
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
+<b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i
></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
</i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
+<i><b><i><b><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></
b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></b></i></b></i>
+<b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><
b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b
><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b>
<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><
i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i
><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i>
<b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><
b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b><
/i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></
b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i
></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
</i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i><
/b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></
i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b
></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
</b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b><
/i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
+</b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
+<i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><
i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b><i><b></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b>
</i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i></b></i>
+
+ </td>
+ </tr>
+
+</table>
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/pagea.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/pagea.html b/nutch-core/src/test/resources/fetch-test-site/pagea.html
new file mode 100644
index 0000000..6444c41
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/pagea.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+ <title>page a</title>
+ </head>
+<body>
+This is page a
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/pageb.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/pageb.html b/nutch-core/src/test/resources/fetch-test-site/pageb.html
new file mode 100644
index 0000000..66e3725
--- /dev/null
+++ b/nutch-core/src/test/resources/fetch-test-site/pageb.html
@@ -0,0 +1,11 @@
+<html>
+ <head>
+ <title>bage b</title>
+ </head>
+<body>
+This is page b
+<a href="index.html">home</a>
+<hr>
+Nutch fetcher test page
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/fetch-test-site/robots.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/fetch-test-site/robots.txt b/nutch-core/src/test/resources/fetch-test-site/robots.txt
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/filter-all.txt
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/filter-all.txt b/nutch-core/src/test/resources/filter-all.txt
new file mode 100644
index 0000000..4ed567a
--- /dev/null
+++ b/nutch-core/src/test/resources/filter-all.txt
@@ -0,0 +1,7 @@
+# Config file for urlfilter-suffix plugin
+# Filter away all urls
+
+# case-insensitive, disallow unknown suffixes
+-I
+
+# allow these
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/log4j.properties b/nutch-core/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3ff115f
--- /dev/null
+++ b/nutch-core/src/test/resources/log4j.properties
@@ -0,0 +1,7 @@
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=info,stdout
+log4j.threshold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/nutch-site.xml
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/nutch-site.xml b/nutch-core/src/test/resources/nutch-site.xml
new file mode 100644
index 0000000..dd40873
--- /dev/null
+++ b/nutch-core/src/test/resources/nutch-site.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+
+<!-- Configuration overrides used during unit tests. -->
+
+<configuration>
+
+<property>
+ <name>plugin.includes</name>
+ <value>.*</value>
+ <description>Enable all plugins during unit testing.</description>
+</property>
+
+<property>
+ <name>distributed.search.test.port</name>
+ <value>60000</value>
+ <description>TCP port used during junit testing.</description>
+</property>
+
+</configuration>
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-mime-util/test.xlsx
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-mime-util/test.xlsx b/nutch-core/src/test/resources/test-mime-util/test.xlsx
new file mode 100644
index 0000000..de33f28
Binary files /dev/null and b/nutch-core/src/test/resources/test-mime-util/test.xlsx differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc
new file mode 100644
index 0000000..c321777
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc
new file mode 100644
index 0000000..5c5d11f
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data
new file mode 100644
index 0000000..0f8d263
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index
new file mode 100644
index 0000000..4dfeaec
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/content/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc
new file mode 100644
index 0000000..c4d315a
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc
new file mode 100644
index 0000000..6dd171e
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data
new file mode 100644
index 0000000..66b1f8d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index
new file mode 100644
index 0000000..ad4ed47
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_fetch/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc
new file mode 100644
index 0000000..8d5ffa4
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/.part-00000.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000
new file mode 100644
index 0000000..41ef146
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_generate/part-00000 differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc
new file mode 100644
index 0000000..683a1dd
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/.part-00000.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000
new file mode 100644
index 0000000..3232abf
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/crawl_parse/part-00000 differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc
new file mode 100644
index 0000000..47164ee
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc
new file mode 100644
index 0000000..a32d62d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data
new file mode 100644
index 0000000..5b71a24
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index
new file mode 100644
index 0000000..d931103
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_data/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc
new file mode 100644
index 0000000..53c925c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc
new file mode 100644
index 0000000..5ba878c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data
new file mode 100644
index 0000000..b58f97f
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index
new file mode 100644
index 0000000..9880a27
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101625/parse_text/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc
new file mode 100644
index 0000000..1b49819
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc
new file mode 100644
index 0000000..5aae648
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data
new file mode 100644
index 0000000..8069e84
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index
new file mode 100644
index 0000000..9b19ce9
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/content/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc
new file mode 100644
index 0000000..926ced1
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc
new file mode 100644
index 0000000..714a1e8
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data
new file mode 100644
index 0000000..f36a9fa
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index
new file mode 100644
index 0000000..c648d89
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_fetch/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc
new file mode 100644
index 0000000..3ee3c94
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/.part-00000.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000
new file mode 100644
index 0000000..1ef0406
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_generate/part-00000 differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc
new file mode 100644
index 0000000..7948825
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/.part-00000.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000 b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000
new file mode 100644
index 0000000..3a83a82
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/crawl_parse/part-00000 differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc
new file mode 100644
index 0000000..b46b6f6
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc
new file mode 100644
index 0000000..18766e6
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data
new file mode 100644
index 0000000..9a1f284
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index
new file mode 100644
index 0000000..47fb983
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_data/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc
new file mode 100644
index 0000000..ceada1b
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.data.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc
new file mode 100644
index 0000000..b756b5c
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/.index.crc differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data
new file mode 100644
index 0000000..ad96df0
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/data differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index
----------------------------------------------------------------------
diff --git a/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index
new file mode 100644
index 0000000..a3e1d8d
Binary files /dev/null and b/nutch-core/src/test/resources/test-segments/20150309101656/parse_text/part-00000/index differ
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/build-plugin.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/build-plugin.xml b/nutch-plugins/build-plugin.xml
new file mode 100755
index 0000000..c759d5f
--- /dev/null
+++ b/nutch-plugins/build-plugin.xml
@@ -0,0 +1,255 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- Imported by plugin build.xml files to define default targets. -->
+<project xmlns:ivy="antlib:org.apache.ivy.ant">
+
+ <property name="name" value="${ant.project.name}"/>
+ <property name="root" value="${basedir}"/>
+
+ <!-- load plugin-specific properties first -->
+ <property file="${user.home}/${name}.build.properties" />
+ <property file="${root}/build.properties" />
+
+ <property name="nutch.root" location="${root}/../../../"/>
+
+ <property name="src.dir" location="${root}/src/java"/>
+ <property name="src.test" location="${root}/src/test"/>
+
+ <available file="${src.test}" type="dir" property="test.available"/>
+
+ <property name="conf.dir" location="${nutch.root}/conf"/>
+
+ <property name="build.dir" location="${nutch.root}/build/${name}"/>
+ <property name="build.classes" location="${build.dir}/classes"/>
+ <property name="build.test" location="${build.dir}/test"/>
+ <property name="build.test.lib" location="${build.test}/lib"/>
+
+ <property name="deploy.dir" location="${nutch.root}/build/plugins/${name}"/>
+
+ <!-- load nutch defaults last so that they can be overridden above -->
+ <property file="${nutch.root}/default.properties" />
+
+ <ivy:settings id="ivy.instance" file="${nutch.root}/ivy/ivysettings.xml" />
+
+ <path id="plugin.deps"/>
+
+ <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
+
+ <!-- the normal classpath -->
+ <path id="classpath">
+ <pathelement location="${build.classes}"/>
+ <fileset refid="lib.jars"/>
+ <pathelement location="${nutch.root}/build/classes"/>
+ <fileset dir="${nutch.root}/build/lib">
+ <include name="*.jar" />
+ </fileset>
+ <path refid="plugin.deps"/>
+ <fileset dir="${deploy.dir}">
+ <include name="*.jar" />
+ </fileset>
+ </path>
+
+ <!-- the unit test classpath -->
+ <path id="test.classpath">
+ <pathelement location="${build.test}" />
+ <pathelement location="${nutch.root}/build/test/classes"/>
+ <pathelement location="${nutch.root}/src/test"/>
+ <pathelement location="${conf.dir}"/>
+ <pathelement location="${nutch.root}/build"/>
+ <!-- test dependencies specific to current plugin -->
+ <fileset dir="${build.test.lib}">
+ <include name="*.jar" />
+ </fileset>
+ <!-- global test dependencies -->
+ <fileset dir="${nutch.root}/build/test/lib">
+ <include name="*.jar" />
+ </fileset>
+ <path refid="classpath"/>
+ </path>
+
+ <!-- ====================================================== -->
+ <!-- Stuff needed by all targets -->
+ <!-- ====================================================== -->
+ <target name="init">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.classes}"/>
+ <mkdir dir="${build.test}"/>
+ <mkdir dir="${build.test.lib}"/>
+ <mkdir dir="${deploy.dir}"/>
+
+ <antcall target="init-plugin"/>
+ </target>
+
+ <!-- to be overridden by sub-projects -->
+ <target name="init-plugin"/>
+
+ <!--
+ ! Used to build plugin compilation dependencies
+ ! (to be overridden by plugins)
+ !-->
+ <target name="deps-jar"/>
+
+ <!--
+ ! Used to deploy plugin runtime dependencies
+ ! (to be overridden by plugins)
+ !-->
+ <target name="deps-test"/>
+
+ <!--
+ ! Used to compile test for plugin runtime dependencies
+ ! (to be overridden by plugins)
+ !-->
+ <target name="deps-test-compile"/>
+
+ <!-- ====================================================== -->
+ <!-- Compile the Java files -->
+ <!-- ====================================================== -->
+ <target name="compile" depends="init,deps-jar, resolve-default">
+ <echo message="Compiling plugin: ${name}"/>
+ <javac
+ encoding="${build.encoding}"
+ srcdir="${src.dir}"
+ includes="**/*.java"
+ destdir="${build.classes}"
+ debug="${javac.debug}"
+ optimize="${javac.optimize}"
+ target="${javac.version}"
+ source="${javac.version}"
+ deprecation="${javac.deprecation}">
+ <classpath refid="classpath"/>
+ </javac>
+ </target>
+
+ <target name="compile-core">
+ <ant target="compile-core" inheritall="false" dir="${nutch.root}"/>
+ <ant target="compile"/>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Make plugin .jar -->
+ <!-- ================================================================== -->
+ <!-- -->
+ <!-- ================================================================== -->
+ <target name="jar" depends="compile">
+ <jar
+ jarfile="${build.dir}/${name}.jar"
+ basedir="${build.classes}"
+ />
+ </target>
+
+ <target name="jar-core" depends="compile-core">
+ <jar
+ jarfile="${build.dir}/${name}.jar"
+ basedir="${build.classes}"
+ />
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Deploy plugin to ${deploy.dir} -->
+ <!-- ================================================================== -->
+ <!-- -->
+ <!-- ================================================================== -->
+ <target name="deploy" depends="jar, deps-test">
+ <mkdir dir="${deploy.dir}"/>
+ <copy file="plugin.xml" todir="${deploy.dir}"
+ preservelastmodified="true"/>
+ <available property="lib-available"
+ file="${build.dir}/${name}.jar"/>
+ <antcall target="copy-generated-lib"/>
+ <copy todir="${deploy.dir}" flatten="true">
+ <fileset refid="lib.jars"/>
+ </copy>
+ </target>
+
+ <target name="copy-generated-lib" if="lib-available">
+ <copy file="${build.dir}/${name}.jar" todir="${deploy.dir}" failonerror="false"/>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Compile test code -->
+ <!-- ================================================================== -->
+ <target name="compile-test" depends="compile, deps-test-compile" if="test.available">
+ <javac
+ encoding="${build.encoding}"
+ srcdir="${src.test}"
+ includes="**/*.java"
+ destdir="${build.test}"
+ debug="${javac.debug}"
+ optimize="${javac.optimize}"
+ target="${javac.version}"
+ source="${javac.version}"
+ deprecation="${javac.deprecation}">
+ <classpath refid="test.classpath"/>
+ </javac>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Run unit tests -->
+ <!-- ================================================================== -->
+ <target name="test" depends="compile-test, deploy" if="test.available">
+ <echo message="Testing plugin: ${name}"/>
+
+ <junit printsummary="yes" haltonfailure="no" fork="yes"
+ errorProperty="tests.failed" failureProperty="tests.failed">
+ <sysproperty key="test.data" value="${build.test}/data"/>
+ <sysproperty key="test.input" value="${root}/data"/>
+ <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+ <classpath refid="test.classpath"/>
+ <formatter type="${test.junit.output.format}" />
+ <batchtest todir="${build.test}" unless="testcase">
+ <fileset dir="${src.test}"
+ includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+ </batchtest>
+ <batchtest todir="${build.test}" if="testcase">
+ <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+ </batchtest>
+ </junit>
+
+ <fail if="tests.failed">Tests failed!</fail>
+
+ </target>
+
+ <!-- target: resolve ================================================= -->
+ <target name="resolve-default" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+ <ivy:resolve file="ivy.xml" conf="default" log="download-only"/>
+ <ivy:retrieve pattern="${deploy.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+ </target>
+
+ <target name="resolve-test" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+ <ivy:resolve file="ivy.xml" conf="test" log="download-only"/>
+ <ivy:retrieve pattern="${build.test.lib}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Clean. Delete the build files, and their directories -->
+ <!-- ================================================================== -->
+ <!-- target: clean =================================================== -->
+ <target name="clean" depends="clean-build, clean-lib" description="--> clean the project" />
+
+ <!-- target: clean-lib =============================================== -->
+ <target name="clean-lib" description="--> clean the project libraries directory (dependencies)">
+ <delete includeemptydirs="true" dir="${build.lib.dir}"/>
+ </target>
+
+ <!-- target: clean-build ============================================= -->
+ <target name="clean-build" description="--> clean the project built files">
+ <delete includeemptydirs="true" dir="${build.dir}"/>
+ <delete includeemptydirs="true" dir="${deploy.dir}"/>
+ </target>
+
+</project>
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/build.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/build.xml b/nutch-plugins/build.xml
new file mode 100755
index 0000000..75ae2e7
--- /dev/null
+++ b/nutch-plugins/build.xml
@@ -0,0 +1,213 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="Nutch" default="deploy-core" basedir=".">
+
+ <target name="deploy-core">
+ <ant target="compile-core" inheritall="false" dir="../.."/>
+ <ant target="deploy"/>
+ </target>
+
+ <!-- ====================================================== -->
+ <!-- Build & deploy all the plugin jars. -->
+ <!-- ====================================================== -->
+ <target name="deploy">
+ <ant dir="creativecommons" target="deploy"/>
+ <ant dir="feed" target="deploy"/>
+ <ant dir="headings" target="deploy"/>
+ <ant dir="index-basic" target="deploy"/>
+ <ant dir="index-anchor" target="deploy"/>
+ <ant dir="index-geoip" target="deploy"/>
+ <ant dir="index-more" target="deploy"/>
+ <ant dir="index-replace" target="deploy"/>
+ <ant dir="index-static" target="deploy"/>
+ <ant dir="index-metadata" target="deploy"/>
+ <ant dir="index-links" target="deploy"/>
+ <ant dir="mimetype-filter" target="deploy"/>
+ <ant dir="indexer-cloudsearch" target="deploy"/>
+ <ant dir="indexer-dummy" target="deploy"/>
+ <ant dir="indexer-elastic" target="deploy"/>
+ <ant dir="indexer-solr" target="deploy"/>
+ <ant dir="language-identifier" target="deploy"/>
+ <ant dir="lib-http" target="deploy"/>
+ <ant dir="lib-nekohtml" target="deploy"/>
+ <ant dir="lib-regex-filter" target="deploy"/>
+ <ant dir="lib-xml" target="deploy"/>
+ <ant dir="microformats-reltag" target="deploy"/>
+ <ant dir="nutch-extensionpoints" target="deploy"/>
+ <ant dir="protocol-file" target="deploy"/>
+ <ant dir="protocol-ftp" target="deploy"/>
+ <ant dir="protocol-http" target="deploy"/>
+ <ant dir="protocol-httpclient" target="deploy"/>
+ <ant dir="lib-htmlunit" target="deploy"/>
+ <ant dir="protocol-htmlunit" target="deploy" />
+ <ant dir="lib-selenium" target="deploy"/>
+ <ant dir="protocol-selenium" target="deploy" />
+ <ant dir="protocol-interactiveselenium" target="deploy" />
+ <ant dir="parse-ext" target="deploy"/>
+ <ant dir="parse-js" target="deploy"/>
+ <ant dir="parse-html" target="deploy"/>
+ <ant dir="parse-metatags" target="deploy"/>
+ <ant dir="parse-swf" target="deploy"/>
+ <ant dir="parse-tika" target="deploy"/>
+ <ant dir="parse-zip" target="deploy"/>
+ <ant dir="scoring-depth" target="deploy"/>
+ <ant dir="scoring-opic" target="deploy"/>
+ <ant dir="scoring-link" target="deploy"/>
+ <ant dir="scoring-similarity" target="deploy"/>
+ <ant dir="subcollection" target="deploy"/>
+ <ant dir="tld" target="deploy"/>
+ <ant dir="urlfilter-automaton" target="deploy"/>
+ <ant dir="urlfilter-domain" target="deploy" />
+ <ant dir="urlfilter-domainblacklist" target="deploy" />
+ <ant dir="urlfilter-prefix" target="deploy"/>
+ <ant dir="urlfilter-regex" target="deploy"/>
+ <ant dir="urlfilter-suffix" target="deploy"/>
+ <ant dir="urlfilter-validator" target="deploy"/>
+ <ant dir="urlfilter-ignoreexempt" target="deploy"/>
+ <ant dir="parsefilter-naivebayes" target="deploy"/>
+ <ant dir="parsefilter-regex" target="deploy"/>
+ <ant dir="urlmeta" target="deploy"/>
+ <ant dir="urlnormalizer-ajax" target="deploy"/>
+ <ant dir="urlnormalizer-basic" target="deploy"/>
+ <ant dir="urlnormalizer-host" target="deploy"/>
+ <ant dir="urlnormalizer-pass" target="deploy"/>
+ <ant dir="urlnormalizer-protocol" target="deploy"/>
+ <ant dir="urlnormalizer-querystring" target="deploy"/>
+ <ant dir="urlnormalizer-regex" target="deploy"/>
+ <ant dir="urlnormalizer-slash" target="deploy"/>
+ </target>
+
+ <!-- ====================================================== -->
+ <!-- Test all of the plugins. -->
+ <!-- ====================================================== -->
+ <target name="test">
+ <parallel threadCount="2">
+ <ant dir="creativecommons" target="test"/>
+ <ant dir="index-basic" target="test"/>
+ <ant dir="index-anchor" target="test"/>
+ <ant dir="index-geoip" target="test"/>
+ <ant dir="index-more" target="test"/>
+ <ant dir="index-static" target="test"/>
+ <ant dir="index-replace" target="test"/>
+ <ant dir="index-links" target="test"/>
+ <ant dir="mimetype-filter" target="test"/>
+ <ant dir="language-identifier" target="test"/>
+ <ant dir="lib-http" target="test"/>
+ <ant dir="protocol-file" target="test"/>
+ <ant dir="protocol-http" target="test"/>
+ <ant dir="protocol-httpclient" target="test"/>
+ <!--ant dir="parse-ext" target="test"/-->
+ <ant dir="feed" target="test"/>
+ <ant dir="parse-html" target="test"/>
+ <ant dir="parse-metatags" target="test"/>
+ <ant dir="parse-swf" target="test"/>
+ <ant dir="parse-tika" target="test"/>
+ <ant dir="parse-zip" target="test"/>
+ <ant dir="parsefilter-regex" target="test"/>
+ <ant dir="subcollection" target="test"/>
+ <ant dir="urlfilter-automaton" target="test"/>
+ <ant dir="urlfilter-domain" target="test"/>
+ <ant dir="urlfilter-domainblacklist" target="test"/>
+ <ant dir="urlfilter-prefix" target="test"/>
+ <ant dir="urlfilter-regex" target="test"/>
+ <ant dir="urlfilter-suffix" target="test"/>
+ <ant dir="urlfilter-validator" target="test"/>
+ <ant dir="urlfilter-ignoreexempt" target="test"/>
+ <ant dir="urlnormalizer-ajax" target="test"/>
+ <ant dir="urlnormalizer-basic" target="test"/>
+ <ant dir="urlnormalizer-host" target="test"/>
+ <ant dir="urlnormalizer-pass" target="test"/>
+ <ant dir="urlnormalizer-protocol" target="test"/>
+ <ant dir="urlnormalizer-querystring" target="test"/>
+ <ant dir="urlnormalizer-regex" target="test"/>
+ <ant dir="urlnormalizer-slash" target="test"/>
+ </parallel>
+ </target>
+
+ <!-- ====================================================== -->
+ <!-- Clean all of the plugins. -->
+ <!-- ====================================================== -->
+ <target name="clean">
+ <ant dir="creativecommons" target="clean"/>
+ <ant dir="feed" target="clean"/>
+ <ant dir="headings" target="clean"/>
+ <ant dir="index-basic" target="clean"/>
+ <ant dir="index-anchor" target="clean"/>
+ <ant dir="index-geoip" target="clean"/>
+ <ant dir="index-more" target="clean"/>
+ <ant dir="index-static" target="clean"/>
+ <ant dir="index-replace" target="clean"/>
+ <ant dir="index-metadata" target="clean"/>
+ <ant dir="index-links" target="clean"/>
+ <ant dir="mimetype-filter" target="clean"/>
+ <ant dir="indexer-cloudsearch" target="clean"/>
+ <ant dir="indexer-dummy" target="clean"/>
+ <ant dir="indexer-elastic" target="clean"/>
+ <ant dir="indexer-solr" target="clean"/>
+ <ant dir="language-identifier" target="clean"/>
+ <!-- <ant dir="lib-commons-httpclient" target="clean"/> -->
+ <ant dir="lib-http" target="clean"/>
+ <!-- <ant dir="lib-lucene-analyzers" target="clean"/>-->
+ <ant dir="lib-nekohtml" target="clean"/>
+ <ant dir="lib-regex-filter" target="clean"/>
+ <ant dir="lib-xml" target="clean"/>
+ <ant dir="microformats-reltag" target="clean"/>
+ <ant dir="nutch-extensionpoints" target="clean"/>
+ <ant dir="protocol-file" target="clean"/>
+ <ant dir="protocol-ftp" target="clean"/>
+ <ant dir="protocol-http" target="clean"/>
+ <ant dir="protocol-httpclient" target="clean"/>
+ <ant dir="lib-htmlunit" target="clean"/>
+ <ant dir="protocol-htmlunit" target="clean" />
+ <ant dir="lib-selenium" target="clean"/>
+ <ant dir="protocol-selenium" target="clean" />
+ <ant dir="protocol-interactiveselenium" target="clean" />
+ <ant dir="parse-ext" target="clean"/>
+ <ant dir="parse-js" target="clean"/>
+ <ant dir="parse-html" target="clean"/>
+ <ant dir="parse-metatags" target="clean"/>
+ <ant dir="parse-swf" target="clean"/>
+ <ant dir="parse-tika" target="clean"/>
+ <ant dir="parse-zip" target="clean"/>
+ <ant dir="parsefilter-regex" target="clean"/>
+ <ant dir="scoring-depth" target="clean"/>
+ <ant dir="scoring-opic" target="clean"/>
+ <ant dir="scoring-link" target="clean"/>
+ <ant dir="scoring-similarity" target="clean"/>
+ <ant dir="subcollection" target="clean"/>
+ <ant dir="tld" target="clean"/>
+ <ant dir="urlfilter-automaton" target="clean"/>
+ <ant dir="urlfilter-domain" target="clean" />
+ <ant dir="urlfilter-domainblacklist" target="clean" />
+ <ant dir="urlfilter-prefix" target="clean"/>
+ <ant dir="urlfilter-regex" target="clean"/>
+ <ant dir="urlfilter-suffix" target="clean"/>
+ <ant dir="urlfilter-validator" target="clean"/>
+ <ant dir="urlfilter-ignoreexempt" target="clean"/>
+ <ant dir="parsefilter-naivebayes" target="clean" />
+ <ant dir="urlmeta" target="clean"/>
+ <ant dir="urlnormalizer-ajax" target="clean"/>
+ <ant dir="urlnormalizer-basic" target="clean"/>
+ <ant dir="urlnormalizer-host" target="clean"/>
+ <ant dir="urlnormalizer-pass" target="clean"/>
+ <ant dir="urlnormalizer-protocol" target="clean"/>
+ <ant dir="urlnormalizer-querystring" target="clean"/>
+ <ant dir="urlnormalizer-regex" target="clean"/>
+ <ant dir="urlnormalizer-slash" target="clean"/>
+ </target>
+</project>
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/README.txt
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/README.txt b/nutch-plugins/creativecommons/README.txt
new file mode 100644
index 0000000..d4d7b65
--- /dev/null
+++ b/nutch-plugins/creativecommons/README.txt
@@ -0,0 +1 @@
+Support for crawling and searching Creative-Commons licensed content.
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/build.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/build.xml b/nutch-plugins/creativecommons/build.xml
new file mode 100755
index 0000000..6443d7f
--- /dev/null
+++ b/nutch-plugins/creativecommons/build.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="creativecommons" default="jar-core">
+
+ <import file="../build-plugin.xml"/>
+
+ <!-- Deploy Unit test dependencies -->
+ <target name="deps-test">
+ <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+ <!-- <ant target="deploy" inheritall="false" dir="../parse-html"/> -->
+ </target>
+
+</project>
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt b/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
new file mode 100644
index 0000000..324617f
--- /dev/null
+++ b/nutch-plugins/creativecommons/conf/crawl-urlfilter.txt
@@ -0,0 +1,18 @@
+# Creative Commnons crawl filter
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'. The first matching pattern in the file
+# determines whether a URL is included or ignored. If no pattern
+# matches, the URL is ignored.
+
+# skip file:, ftp:, & mailto: urls
+-^(file|ftp|mailto|https):
+
+# skip image and other suffixes we can't yet parse
+-\.(gif|GIF|jpg|JPG|ico|ICO|css|sit|eps|wmf|rtf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe|mp3|rss|xml|doc|pdf|txt|DOC|PDF|TXT)$
+
+# skip URLs containing certain characters as probable queries, etc.
+-[?*!@=]
+
+# accept anything else
++.
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/conf/nutch-site.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/conf/nutch-site.xml b/nutch-plugins/creativecommons/conf/nutch-site.xml
new file mode 100644
index 0000000..71e344b
--- /dev/null
+++ b/nutch-plugins/creativecommons/conf/nutch-site.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+
+<!-- Creative Commons' Nutch configuration -->
+
+<nutch-conf>
+
+<property>
+ <name>http.agent.name</name>
+ <value>CreativeCommons</value>
+ <description>Our HTTP 'User-Agent' request header.</description>
+</property>
+
+<property>
+ <name>http.robots.agents</name>
+ <value>CreativeCommons,Nutch,*</value>
+ <description>The agent strings we'll look for in robots.txt files,
+ comma-separated, in decreasing order of precedence.</description>
+</property>
+
+<property>
+ <name>fetcher.server.delay</name>
+ <value>2.0</value>
+ <description>We need to be more polite than when crawling an
+ intranet that we control.</description>
+</property>
+
+<property>
+ <name>http.max.delays</name>
+ <value>3</value>
+ <description>The CC crawl visits a large number of different
+ hosts, so we should not need to delay much.</description>
+</property>
+
+<property>
+ <name>creativecommons.exclude.unlicensed</name>
+ <value>true</value>
+ <description>Exclude HTML content which does not contain a CC license.
+ </description>
+</property>
+
+<property>
+ <name>plugin.excludes</name>
+ <value>parse-(?!html).*</value>
+ <description>Exclude non-HTML content, since we don't know how to
+ find a CC license in anything but HTML.
+ </description>
+</property>
+
+</nutch-conf>
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-plugins/creativecommons/ivy.xml
----------------------------------------------------------------------
diff --git a/nutch-plugins/creativecommons/ivy.xml b/nutch-plugins/creativecommons/ivy.xml
new file mode 100644
index 0000000..1a86d68
--- /dev/null
+++ b/nutch-plugins/creativecommons/ivy.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<ivy-module version="1.0">
+ <info organisation="org.apache.nutch" module="${ant.project.name}">
+ <license name="Apache 2.0"/>
+ <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+ <description>
+ Apache Nutch
+ </description>
+ </info>
+
+ <configurations>
+ <include file="../../..//ivy/ivy-configurations.xml"/>
+ </configurations>
+
+ <publications>
+ <!--get the artifact from our module name-->
+ <artifact conf="master"/>
+ </publications>
+
+ <dependencies>
+ </dependencies>
+
+</ivy-module>