You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/05 22:49:03 UTC

[19/69] [abbrv] [partial] nutch git commit: Re arranged the source code as per maven conventions for build

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
----------------------------------------------------------------------
diff --git a/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java b/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
deleted file mode 100644
index 42cd46d..0000000
--- a/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.indexer.staticfield;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.crawl.Inlinks;
-import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.parse.ParseImpl;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * JUnit test case which tests 1. that static data fields are added to a
- * document 2. that empty {@code index.static} does not add anything to the
- * document 3. that valid field:value pairs are added to the document 4. that
- * fields and values added to the document are trimmed
- * 
- * @author tejasp
- */
-
-public class TestStaticFieldIndexerTest {
-
-  Configuration conf;
-
-  Inlinks inlinks;
-  ParseImpl parse;
-  CrawlDatum crawlDatum;
-  Text url;
-  StaticFieldIndexer filter;
-
-  @Before
-  public void setUp() throws Exception {
-    conf = NutchConfiguration.create();
-    parse = new ParseImpl();
-    url = new Text("http://nutch.apache.org/index.html");
-    crawlDatum = new CrawlDatum();
-    inlinks = new Inlinks();
-    filter = new StaticFieldIndexer();
-  }
-
-  /**
-   * Test that empty {@code index.static} does not add anything to the document
-   * 
-   * @throws Exception
-   */
-  @Test
-  public void testEmptyIndexStatic() throws Exception {
-
-    Assert.assertNotNull(filter);
-    filter.setConf(conf);
-
-    NutchDocument doc = new NutchDocument();
-
-    try {
-      filter.filter(doc, parse, url, crawlDatum, inlinks);
-    } catch (Exception e) {
-      e.printStackTrace();
-      Assert.fail(e.getMessage());
-    }
-
-    Assert.assertNotNull(doc);
-    Assert.assertTrue("tests if no field is set for empty index.static", doc
-        .getFieldNames().isEmpty());
-  }
-
-  /**
-   * Test that valid field:value pairs are added to the document
-   * 
-   * @throws Exception
-   */
-  @Test
-  public void testNormalScenario() throws Exception {
-
-    conf.set("index.static",
-        "field1:val1, field2    :      val2 val3     , field3, field4 :val4 , ");
-    Assert.assertNotNull(filter);
-    filter.setConf(conf);
-
-    NutchDocument doc = new NutchDocument();
-
-    try {
-      filter.filter(doc, parse, url, crawlDatum, inlinks);
-    } catch (Exception e) {
-      e.printStackTrace();
-      Assert.fail(e.getMessage());
-    }
-
-    Assert.assertNotNull(doc);
-    Assert.assertFalse("test if doc is not empty", doc.getFieldNames()
-        .isEmpty());
-    Assert.assertEquals("test if doc has 3 fields", 3, doc.getFieldNames()
-        .size());
-    Assert.assertTrue("test if doc has field1", doc.getField("field1")
-        .getValues().contains("val1"));
-    Assert.assertTrue("test if doc has field2", doc.getField("field2")
-        .getValues().contains("val2"));
-    Assert.assertTrue("test if doc has field4", doc.getField("field4")
-        .getValues().contains("val4"));
-  }
-
-  /**
-   * Test for NUTCH-2052 custom delimiters in index.static.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testCustomDelimiters() throws Exception {
-
-    conf.set("index.static.fieldsep", ">");
-    conf.set("index.static.keysep", "=");
-    conf.set("index.static.valuesep", "|");
-    conf.set("index.static",
-        "field1=val1>field2    =      val2|val3     >field3>field4 =val4 > ");
-    Assert.assertNotNull(filter);
-    filter.setConf(conf);
-
-    NutchDocument doc = new NutchDocument();
-
-    try {
-      filter.filter(doc, parse, url, crawlDatum, inlinks);
-    } catch (Exception e) {
-      e.printStackTrace();
-      Assert.fail(e.getMessage());
-    }
-
-    Assert.assertNotNull(doc);
-    Assert.assertFalse("test if doc is not empty", doc.getFieldNames()
-        .isEmpty());
-    Assert.assertEquals("test if doc has 3 fields", 3, doc.getFieldNames()
-        .size());
-    Assert.assertTrue("test if doc has field1", doc.getField("field1")
-        .getValues().contains("val1"));
-    Assert.assertTrue("test if doc has field2", doc.getField("field2")
-        .getValues().contains("val2"));
-    Assert.assertTrue("test if doc has field4", doc.getField("field4")
-        .getValues().contains("val4"));
-  }
-
-  /**
-   * Test for NUTCH-2052 custom delimiters in index.static.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testCustomMulticharacterDelimiters() throws Exception {
-
-    conf.set("index.static.fieldsep", "\n\n");
-    conf.set("index.static.keysep", "\t\t");
-    conf.set("index.static.valuesep", "***");
-    conf.set("index.static", "field1\t\tval1\n\n" + "field2\t\tval2***val3\n\n"
-        + "field3\n\n" + "field4\t\tval4\n\n\n\n");
-    Assert.assertNotNull(filter);
-    filter.setConf(conf);
-
-    NutchDocument doc = new NutchDocument();
-
-    try {
-      filter.filter(doc, parse, url, crawlDatum, inlinks);
-    } catch (Exception e) {
-      e.printStackTrace();
-      Assert.fail(e.getMessage());
-    }
-
-    Assert.assertNotNull(doc);
-    Assert.assertFalse("test if doc is not empty", doc.getFieldNames()
-        .isEmpty());
-    Assert.assertEquals("test if doc has 3 fields", 3, doc.getFieldNames()
-        .size());
-    Assert.assertTrue("test if doc has field1", doc.getField("field1")
-        .getValues().contains("val1"));
-    Assert.assertTrue("test if doc has field2", doc.getField("field2")
-        .getValues().contains("val2"));
-    Assert.assertTrue("test if doc has field4", doc.getField("field4")
-        .getValues().contains("val4"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/README.md
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/README.md b/src/plugin/indexer-cloudsearch/README.md
deleted file mode 100644
index 8669682..0000000
--- a/src/plugin/indexer-cloudsearch/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-AWS CloudSearch plugin for Nutch 
-================================
-
-See [http://aws.amazon.com/cloudsearch/] for information on AWS CloudSearch.
-
-Steps to use :
-
-From runtime/local/bin
-
-* Configure the AWS credentials 
-
-Edit `~/.aws/credentials`, see [http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html] for details. Note that this should not be necessary when running Nutch on EC2.
-
-* Edit ../conf/nutch-site.xml and check that 'plugin.includes' contains 'indexer-cloudsearch'. 
-
-* (Optional) Test the indexing 
-
-`./nutch indexchecker -D doIndex=true -D cloudsearch.batch.dump=true "http://nutch.apache.org/"`
-
-if the agent name hasn't been configured in nutch-site.xml, it can be added on the command line with `-D http.agent.name=whateverValueDescribesYouBest`
-
-you should see the fields extracted for the indexing coming up on the console.
-
-Using the `cloudsearch.batch.dump` parameter allows to dump the batch to the local temp dir. The files has the prefix "CloudSearch_" e.g. `/tmp/CloudSearch_4822180575734804454.json`. This temp file can be used as a template when defining the fields in the domain creation (see below).
-
-* Create a CloudSearch domain
-
-This can be done using the web console [https://eu-west-1.console.aws.amazon.com/cloudsearch/home?region=eu-west-1#]. You can use the temp file generated above to bootstrap the field definition. 
-
-You can also create the domain using the AWS CLI [http://docs.aws.amazon.com/cloudsearch/latest/developerguide/creating-domains.html] and the `createCSDomain.sh` example script provided. This script is merely as starting point which you should further improve and fine tune. 
-
-Note that the creation of the domain can take some time. Once it is complete, note the document endpoint, or alternatively verify the region and domain name.
-
-* Edit ../conf/nutch-site.xml and add `cloudsearch.endpoint` and `cloudsearch.region`. 
-
-* Re-test the indexing
-
-`./nutch indexchecker -D doIndex=true "http://nutch.apache.org/"`
-
-and check in the CloudSearch console that the document has been succesfully indexed.
-
-Additional parameters
-
-* `cloudsearch.batch.maxSize` \: can be used to limit the size of the batches sent to CloudSearch to N documents. Note that the default limitations still apply.
-
-* `cloudsearch.batch.dump` \: see above. Stores the JSON representation of the document batch in the local temp dir, useful for bootstrapping the index definition.
-
-Note
-
-The CloudSearchIndexWriter will log any errors while sending the batches to CloudSearch and will resume the process without breaking. This means that you might not get all the documents in the index. You should check the log files for errors. Using small batch sizes will limit the number of documents skipped in case of error.
-
-Any fields not defined in the CloudSearch domain will be ignored by the CloudSearchIndexWriter. Again, the logs will contain a trace of any field names skipped.
-
-
-
-  
-
-

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/build.xml b/src/plugin/indexer-cloudsearch/build.xml
deleted file mode 100644
index 852b2650bd..0000000
--- a/src/plugin/indexer-cloudsearch/build.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-cloudsearch" default="jar-core">
-
-  <import file="../build-plugin.xml" />
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/createCSDomain.sh
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/createCSDomain.sh b/src/plugin/indexer-cloudsearch/createCSDomain.sh
deleted file mode 100644
index 24fb015..0000000
--- a/src/plugin/indexer-cloudsearch/createCSDomain.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-# example of domain configuration for CloudSearch
-
-DOMAIN="$1"
-
-if [ "$DOMAIN" = "" ]; then
-    echo "Need to specify a domain name as argument"
-    exit -1;
-fi
-
-aws cloudsearch create-domain --domain-name $DOMAIN
-
-aws cloudsearch define-index-field --domain-name $DOMAIN --name boost --type double --sort-enabled true --facet-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name content --type text --sort-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name digest --type literal --sort-enabled false --facet-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name host --type literal --sort-enabled false --facet-enabled true
-aws cloudsearch define-index-field --domain-name $DOMAIN --name id --type literal --sort-enabled false --facet-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name segment --type literal --sort-enabled true --facet-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name title --type text --sort-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name tstamp --type date --sort-enabled true --facet-enabled false
-aws cloudsearch define-index-field --domain-name $DOMAIN --name url --type literal --sort-enabled false --facet-enabled false
-
-

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/ivy.xml b/src/plugin/indexer-cloudsearch/ivy.xml
deleted file mode 100644
index 00d9fc3..0000000
--- a/src/plugin/indexer-cloudsearch/ivy.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" ?>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<ivy-module version="1.0">
-  <info organisation="org.apache.nutch" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
-    <description>
-        Apache Nutch
-    </description>
-  </info>
-
-  <configurations>
-    <include file="../../../ivy/ivy-configurations.xml"/>
-  </configurations>
-
-  <publications>
-    <!--get the artifact from our module name-->
-    <artifact conf="master"/>
-  </publications>
-
-  <dependencies>
-	<dependency org="com.amazonaws" name="aws-java-sdk-cloudsearch" rev="1.10.0"/>
-  </dependencies>
-  
-</ivy-module>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/plugin.xml b/src/plugin/indexer-cloudsearch/plugin.xml
deleted file mode 100644
index 5b44253..0000000
--- a/src/plugin/indexer-cloudsearch/plugin.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<plugin id="indexer-cloudsearch" name="CloudSearchIndexWriter" version="1.0.0"
-  provider-name="nutch.apache.org">
-
-  <runtime>
-    <library name="indexer-cloudsearch.jar">
-      <export name="*" />
-    </library>
-
-     <library name="aws-java-sdk-cloudsearch-1.10.0.jar"/>
-     <library name="aws-java-sdk-core-1.10.0.jar"/>
-     <library name="commons-codec-1.6.jar"/>
-     <library name="commons-logging-1.1.3.jar"/>
-     <library name="httpclient-4.3.6.jar"/>
-     <library name="httpcore-4.3.3.jar"/>
-     <library name="jackson-annotations-2.5.0.jar"/>
-     <library name="jackson-core-2.5.3.jar"/>
-     <library name="jackson-databind-2.5.3.jar"/>
-     <library name="joda-time-2.8.jar"/>
-
-  </runtime>
-
-  <requires>
-    <import plugin="nutch-extensionpoints" />
-  </requires>
-
-  <extension id="org.apache.nutch.indexer.cloudsearch"
-    name="CloudSearch Index Writer"
-    point="org.apache.nutch.indexer.IndexWriter">
-    <implementation id="CloudSearchIndexWriter"
-      class="org.apache.nutch.indexwriter.cloudsearch.CloudSearchIndexWriter" />
-  </extension>
-
-</plugin>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
deleted file mode 100644
index 8bfb161..0000000
--- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.indexwriter.cloudsearch;
-
-public interface CloudSearchConstants {
-  public static final String CLOUDSEARCH_PREFIX = "cloudsearch.";
-  public static final String ENDPOINT = CLOUDSEARCH_PREFIX + "endpoint";
-  public static final String REGION = CLOUDSEARCH_PREFIX + "region";
-  public static final String BATCH_DUMP = CLOUDSEARCH_PREFIX + "batch.dump";
-  public static final String MAX_DOCS_BATCH = CLOUDSEARCH_PREFIX
-      + "batch.maxSize";
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
deleted file mode 100644
index b6f1a9c..0000000
--- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
+++ /dev/null
@@ -1,382 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.indexwriter.cloudsearch;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.nutch.indexer.IndexWriter;
-import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.NutchField;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.amazonaws.regions.RegionUtils;
-import com.amazonaws.services.cloudsearchdomain.AmazonCloudSearchDomainClient;
-import com.amazonaws.services.cloudsearchdomain.model.ContentType;
-import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsRequest;
-import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsResult;
-import com.amazonaws.services.cloudsearchv2.AmazonCloudSearchClient;
-import com.amazonaws.services.cloudsearchv2.model.DescribeDomainsRequest;
-import com.amazonaws.services.cloudsearchv2.model.DescribeDomainsResult;
-import com.amazonaws.services.cloudsearchv2.model.DescribeIndexFieldsRequest;
-import com.amazonaws.services.cloudsearchv2.model.DescribeIndexFieldsResult;
-import com.amazonaws.services.cloudsearchv2.model.DomainStatus;
-import com.amazonaws.services.cloudsearchv2.model.IndexFieldStatus;
-import com.amazonaws.util.json.JSONException;
-import com.amazonaws.util.json.JSONObject;
-
-/**
- * Writes documents to CloudSearch.
- */
-public class CloudSearchIndexWriter implements IndexWriter {
-  public static final Logger LOG = LoggerFactory
-      .getLogger(CloudSearchIndexWriter.class);
-
-  private static final int MAX_SIZE_BATCH_BYTES = 5242880;
-  private static final int MAX_SIZE_DOC_BYTES = 1048576;
-
-  private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(
-      "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
-
-  private AmazonCloudSearchDomainClient client;
-
-  private int maxDocsInBatch = -1;
-
-  private StringBuffer buffer;
-
-  private int numDocsInBatch = 0;
-
-  private boolean dumpBatchFilesToTemp = false;
-
-  private Configuration conf;
-
-  private Map<String, String> csfields = new HashMap<String, String>();
-
-  private String regionName;
-
-  @Override
-  public void open(JobConf job, String name) throws IOException {
-    LOG.debug("CloudSearchIndexWriter.open() name={} ", name);
-
-    maxDocsInBatch = job.getInt(CloudSearchConstants.MAX_DOCS_BATCH, -1);
-
-    buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
-
-    dumpBatchFilesToTemp = job.getBoolean(CloudSearchConstants.BATCH_DUMP,
-        false);
-
-    if (dumpBatchFilesToTemp) {
-      // only dumping to local file
-      // no more config required
-      return;
-    }
-
-    String endpoint = job.get(CloudSearchConstants.ENDPOINT);
-
-    if (StringUtils.isBlank(endpoint)) {
-      throw new RuntimeException("endpoint not set for CloudSearch");
-    }
-
-    AmazonCloudSearchClient cl = new AmazonCloudSearchClient();
-    if (StringUtils.isNotBlank(regionName)) {
-      cl.setRegion(RegionUtils.getRegion(regionName));
-    }
-
-    String domainName = null;
-
-    // retrieve the domain name
-    DescribeDomainsResult domains = cl
-        .describeDomains(new DescribeDomainsRequest());
-
-    Iterator<DomainStatus> dsiter = domains.getDomainStatusList().iterator();
-    while (dsiter.hasNext()) {
-      DomainStatus ds = dsiter.next();
-      if (ds.getDocService().getEndpoint().equals(endpoint)) {
-        domainName = ds.getDomainName();
-        break;
-      }
-    }
-
-    // check domain name
-    if (StringUtils.isBlank(domainName)) {
-      throw new RuntimeException(
-          "No domain name found for CloudSearch endpoint");
-    }
-
-    DescribeIndexFieldsResult indexDescription = cl.describeIndexFields(
-        new DescribeIndexFieldsRequest().withDomainName(domainName));
-    for (IndexFieldStatus ifs : indexDescription.getIndexFields()) {
-      String indexname = ifs.getOptions().getIndexFieldName();
-      String indextype = ifs.getOptions().getIndexFieldType();
-      LOG.info("CloudSearch index name {} of type {}", indexname, indextype);
-      csfields.put(indexname, indextype);
-    }
-
-    client = new AmazonCloudSearchDomainClient();
-    client.setEndpoint(endpoint);
-
-  }
-
-  @Override
-  public void delete(String url) throws IOException {
-
-    try {
-      JSONObject doc_builder = new JSONObject();
-
-      doc_builder.put("type", "delete");
-
-      // generate the id from the url
-      String ID = CloudSearchUtils.getID(url);
-      doc_builder.put("id", ID);
-
-      // add to the batch
-      addToBatch(doc_builder.toString(2), url);
-
-    } catch (JSONException e) {
-      LOG.error("Exception caught while building JSON object", e);
-    }
-
-  }
-
-  @Override
-  public void update(NutchDocument doc) throws IOException {
-    write(doc);
-  }
-
-  @Override
-  public void write(NutchDocument doc) throws IOException {
-    try {
-      JSONObject doc_builder = new JSONObject();
-
-      doc_builder.put("type", "add");
-
-      String url = doc.getField("url").toString();
-
-      // generate the id from the url
-      String ID = CloudSearchUtils.getID(url);
-      doc_builder.put("id", ID);
-
-      JSONObject fields = new JSONObject();
-
-      for (final Entry<String, NutchField> e : doc) {
-        String fieldname = cleanFieldName(e.getKey());
-        String type = csfields.get(fieldname);
-
-        // undefined in index
-        if (!dumpBatchFilesToTemp && type == null) {
-          LOG.info(
-              "Field {} not defined in CloudSearch domain for {} - skipping.",
-              fieldname, url);
-          continue;
-        }
-
-        List<Object> values = e.getValue().getValues();
-        // write the values
-        for (Object value : values) {
-          // Convert dates to an integer
-          if (value instanceof Date) {
-            Date d = (Date) value;
-            value = DATE_FORMAT.format(d);
-          }
-          // normalise strings
-          else if (value instanceof String) {
-            value = CloudSearchUtils.stripNonCharCodepoints((String) value);
-          }
-
-          fields.accumulate(fieldname, value);
-        }
-      }
-
-      doc_builder.put("fields", fields);
-
-      addToBatch(doc_builder.toString(2), url);
-
-    } catch (JSONException e) {
-      LOG.error("Exception caught while building JSON object", e);
-    }
-  }
-
-  private void addToBatch(String currentDoc, String url) throws IOException {
-    int currentDocLength = currentDoc.getBytes(StandardCharsets.UTF_8).length;
-
-    // check that the doc is not too large -> skip it if it does
-    if (currentDocLength > MAX_SIZE_DOC_BYTES) {
-      LOG.error("Doc too large. currentDoc.length {} : {}", currentDocLength,
-          url);
-      return;
-    }
-
-    int currentBufferLength = buffer.toString()
-        .getBytes(StandardCharsets.UTF_8).length;
-
-    LOG.debug("currentDoc.length {}, buffer length {}", currentDocLength,
-        currentBufferLength);
-
-    // can add it to the buffer without overflowing?
-    if (currentDocLength + 2 + currentBufferLength < MAX_SIZE_BATCH_BYTES) {
-      if (numDocsInBatch != 0)
-        buffer.append(',');
-      buffer.append(currentDoc);
-      numDocsInBatch++;
-    }
-    // flush the previous batch and create a new one with this doc
-    else {
-      commit();
-      buffer.append(currentDoc);
-      numDocsInBatch++;
-    }
-
-    // have we reached the max number of docs in a batch after adding
-    // this doc?
-    if (maxDocsInBatch > 0 && numDocsInBatch == maxDocsInBatch) {
-      commit();
-    }
-  }
-
-  @Override
-  public void commit() throws IOException {
-
-    // nothing to do
-    if (numDocsInBatch == 0) {
-      return;
-    }
-
-    // close the array
-    buffer.append(']');
-
-    LOG.info("Sending {} docs to CloudSearch", numDocsInBatch);
-
-    byte[] bb = buffer.toString().getBytes(StandardCharsets.UTF_8);
-
-    if (dumpBatchFilesToTemp) {
-      try {
-        File temp = File.createTempFile("CloudSearch_", ".json");
-        FileUtils.writeByteArrayToFile(temp, bb);
-        LOG.info("Wrote batch file {}", temp.getName());
-      } catch (IOException e1) {
-        LOG.error("Exception while generating batch file", e1);
-      } finally {
-        // reset buffer and doc counter
-        buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
-        numDocsInBatch = 0;
-      }
-      return;
-    }
-    // not in debug mode
-    try (InputStream inputStream = new ByteArrayInputStream(bb)) {
-      UploadDocumentsRequest batch = new UploadDocumentsRequest();
-      batch.setContentLength((long) bb.length);
-      batch.setContentType(ContentType.Applicationjson);
-      batch.setDocuments(inputStream);
-      UploadDocumentsResult result = client.uploadDocuments(batch);
-    } catch (Exception e) {
-      LOG.error("Exception while sending batch", e);
-      LOG.error(buffer.toString());
-    } finally {
-      // reset buffer and doc counter
-      buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');
-      numDocsInBatch = 0;
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    // This will flush any unsent documents.
-    commit();
-    // close the client
-    if (client != null){
-      client.shutdown();
-    }
-  }
-
-  public Configuration getConf() {
-    return this.conf;
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    this.conf = conf;
-    String endpoint = getConf().get(CloudSearchConstants.ENDPOINT);
-    boolean dumpBatchFilesToTemp = getConf()
-        .getBoolean(CloudSearchConstants.BATCH_DUMP, false);
-    this.regionName = getConf().get(CloudSearchConstants.REGION);
-
-    if (StringUtils.isBlank(endpoint) && !dumpBatchFilesToTemp) {
-      String message = "Missing CloudSearch endpoint. Should set it set via -D "
-          + CloudSearchConstants.ENDPOINT + " or in nutch-site.xml";
-      message += "\n" + describe();
-      LOG.error(message);
-      throw new RuntimeException(message);
-    }
-  }
-
-  public String describe() {
-    String configuredEndpoint = null;
-    String configuredRegion = null;
-
-    // get the values set in the conf
-    if (getConf() != null) {
-      configuredEndpoint = getConf().get(CloudSearchConstants.ENDPOINT);
-      configuredRegion = getConf().get(CloudSearchConstants.REGION);
-    }
-
-    StringBuffer sb = new StringBuffer("CloudSearchIndexWriter\n");
-    sb.append("\t").append(CloudSearchConstants.ENDPOINT)
-        .append(" : URL of the CloudSearch domain's document endpoint.");
-    if (StringUtils.isNotBlank(configuredEndpoint)) {
-      sb.append(" (value: ").append(configuredEndpoint).append(")");
-    }
-    sb.append("\n");
-
-    sb.append("\t").append(CloudSearchConstants.REGION)
-        .append(" : name of the CloudSearch region.");
-    if (StringUtils.isNotBlank(configuredRegion)) {
-      sb.append(" (").append(configuredRegion).append(")");
-    }
-    sb.append("\n");
-    return sb.toString();
-  }
-
-  /**
-   * Remove the non-cloudSearch-legal characters. Note that this might convert
-   * two fields to the same name.
-   * 
-   * @param name
-   * @return
-   */
-  String cleanFieldName(String name) {
-    String lowercase = name.toLowerCase();
-    return lowercase.replaceAll("[^a-z_0-9]", "_");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchUtils.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchUtils.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchUtils.java
deleted file mode 100644
index 5783981..0000000
--- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchUtils.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.indexwriter.cloudsearch;
-
-import java.nio.charset.StandardCharsets;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-import org.apache.commons.codec.binary.Hex;
-
-public class CloudSearchUtils {
-
-  private static MessageDigest digester;
-
-  static {
-    try {
-      digester = MessageDigest.getInstance("SHA-512");
-    } catch (NoSuchAlgorithmException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  /** Returns a normalised doc ID based on the URL of a document **/
-  public static String getID(String url) {
-
-    // the document needs an ID
-    // @see
-    // http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
-    // A unique ID for the document. A document ID can contain any
-    // letter or number and the following characters: _ - = # ; : / ? @
-    // &. Document IDs must be at least 1 and no more than 128
-    // characters long.
-    byte[] dig = digester.digest(url.getBytes(StandardCharsets.UTF_8));
-    String ID = Hex.encodeHexString(dig);
-    // is that even possible?
-    if (ID.length() > 128) {
-      throw new RuntimeException("ID larger than max 128 chars");
-    }
-    return ID;
-  }
-
-  public static String stripNonCharCodepoints(String input) {
-    StringBuilder retval = new StringBuilder();
-    char ch;
-
-    for (int i = 0; i < input.length(); i++) {
-      ch = input.charAt(i);
-
-      // Keep only characters that are legal for CloudSearch
-      if ((ch == 0x9 || ch == 0xa || ch == 0xd)
-          || (ch >= 0x20 && ch <= 0xFFFD)) {
-        retval.append(ch);
-      }
-    }
-
-    return retval.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-dummy/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-dummy/build.xml b/src/plugin/indexer-dummy/build.xml
deleted file mode 100644
index d941278..0000000
--- a/src/plugin/indexer-dummy/build.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-dummy" default="jar-core">
-
-  <import file="../build-plugin.xml" />
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-dummy/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-dummy/ivy.xml b/src/plugin/indexer-dummy/ivy.xml
deleted file mode 100644
index 1a86d68..0000000
--- a/src/plugin/indexer-dummy/ivy.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<ivy-module version="1.0">
-  <info organisation="org.apache.nutch" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
-    <description>
-        Apache Nutch
-    </description>
-  </info>
-
-  <configurations>
-    <include file="../../..//ivy/ivy-configurations.xml"/>
-  </configurations>
-
-  <publications>
-    <!--get the artifact from our module name-->
-    <artifact conf="master"/>
-  </publications>
-
-  <dependencies>
-  </dependencies>
-  
-</ivy-module>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-dummy/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-dummy/plugin.xml b/src/plugin/indexer-dummy/plugin.xml
deleted file mode 100644
index 963c66a..0000000
--- a/src/plugin/indexer-dummy/plugin.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<plugin id="indexer-dummy" name="DummyIndexWriter" version="1.0.0"
-  provider-name="nutch.apache.org">
-
-  <runtime>
-    <library name="indexer-dummy.jar">
-      <export name="*" />
-    </library>
-  </runtime>
-
-  <requires>
-    <import plugin="nutch-extensionpoints" />
-  </requires>
-
-  <extension id="org.apache.nutch.indexer.dummy"
-    name="Dummy Index Writer"
-    point="org.apache.nutch.indexer.IndexWriter">
-    <implementation id="DummyIndexWriter"
-      class="org.apache.nutch.indexwriter.dummy.DummyIndexWriter" />
-  </extension>
-
-</plugin>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
deleted file mode 100644
index b27ba14..0000000
--- a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.indexwriter.dummy;
-
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.FileWriter;
-import java.io.Writer;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.nutch.indexer.IndexWriter;
-import org.apache.nutch.indexer.IndexerMapReduce;
-import org.apache.nutch.indexer.NutchDocument;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * DummyIndexWriter. This pluggable indexer writes <action>\t<url>\n lines to a
- * plain text file for debugging purposes. Possible actions are delete, update
- * and add.
- */
-public class DummyIndexWriter implements IndexWriter {
-  public static final Logger LOG = LoggerFactory
-      .getLogger(DummyIndexWriter.class);
-  private Configuration config;
-  private Writer writer;
-  private boolean delete = false;
-
-  public void open(JobConf job, String name) throws IOException {
-    delete = job.getBoolean(IndexerMapReduce.INDEXER_DELETE, false);
-  }
-
-  @Override
-  public void delete(String key) throws IOException {
-    if (delete) {
-      writer.write("delete\t" + key + "\n");
-    }
-  }
-
-  @Override
-  public void update(NutchDocument doc) throws IOException {
-    writer.write("update\t" + doc.getFieldValue("id") + "\n");
-  }
-
-  @Override
-  public void write(NutchDocument doc) throws IOException {
-    writer.write("add\t" + doc.getFieldValue("id") + "\n");
-  }
-
-  public void close() throws IOException {
-    writer.flush();
-    writer.close();
-  }
-
-  @Override
-  public void commit() throws IOException {
-    writer.write("commit\n");
-  }
-
-  @Override
-  public Configuration getConf() {
-    return config;
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    config = conf;
-    String path = conf.get("dummy.path");
-    if (path == null) {
-      String message = "Missing path. Should be set via -Ddummy.path";
-      message += "\n" + describe();
-      LOG.error(message);
-      throw new RuntimeException(message);
-    }
-
-    try {
-      writer = new BufferedWriter(new FileWriter(conf.get("dummy.path")));
-    } catch (IOException e) {
-    }
-  }
-
-  public String describe() {
-    StringBuffer sb = new StringBuffer("DummyIndexWriter\n");
-    sb.append("\t").append(
-        "dummy.path : Path of the file to write to (mandatory)\n");
-    return sb.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
deleted file mode 100644
index 8cc00c4..0000000
--- a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Index writer plugin for debugging, writes pairs of &lt;action, url&gt; to a
- * text file, action is one of "add", "update", or "delete".
- */
-package org.apache.nutch.indexwriter.dummy;
-

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/build-ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/build-ivy.xml b/src/plugin/indexer-elastic/build-ivy.xml
deleted file mode 100644
index 96f336c..0000000
--- a/src/plugin/indexer-elastic/build-ivy.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-elastic" default="deps-jar" xmlns:ivy="antlib:org.apache.ivy.ant">
-
-    <property name="ivy.install.version" value="2.1.0" />
-    <condition property="ivy.home" value="${env.IVY_HOME}">
-      <isset property="env.IVY_HOME" />
-    </condition>
-    <property name="ivy.home" value="${user.home}/.ant" />
-    <property name="ivy.checksums" value="" />
-    <property name="ivy.jar.dir" value="${ivy.home}/lib" />
-    <property name="ivy.jar.file" value="${ivy.jar.dir}/ivy.jar" />
-
-    <target name="download-ivy" unless="offline">
-
-        <mkdir dir="${ivy.jar.dir}"/>
-        <!-- download Ivy from web site so that it can be used even without any special installation -->
-        <get src="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/ivy-${ivy.install.version}.jar" 
-             dest="${ivy.jar.file}" usetimestamp="true"/>
-    </target>
-
-    <target name="init-ivy" depends="download-ivy">
-      <!-- try to load ivy here from ivy home, in case the user has not already dropped
-              it into ant's lib dir (note that the latter copy will always take precedence).
-              We will not fail as long as local lib dir exists (it may be empty) and
-              ivy is in at least one of ant's lib dir or the local lib dir. -->
-        <path id="ivy.lib.path">
-            <fileset dir="${ivy.jar.dir}" includes="*.jar"/>
-
-        </path>
-        <taskdef resource="org/apache/ivy/ant/antlib.xml"
-                 uri="antlib:org.apache.ivy.ant" classpathref="ivy.lib.path"/>
-    </target>
-
-  <target name="deps-jar" depends="init-ivy">
-    <ivy:retrieve pattern="lib/[artifact]-[revision].[ext]"/>
-  </target>
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/build.xml b/src/plugin/indexer-elastic/build.xml
deleted file mode 100644
index 38955ff..0000000
--- a/src/plugin/indexer-elastic/build.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-elastic" default="jar-core">
-
-  <import file="../build-plugin.xml" />
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/howto_upgrade_es.txt
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/howto_upgrade_es.txt b/src/plugin/indexer-elastic/howto_upgrade_es.txt
deleted file mode 100644
index b577053..0000000
--- a/src/plugin/indexer-elastic/howto_upgrade_es.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-1. Upgrade elasticsearch dependency in src/plugin/indexer-elastic/ivy.xml
-
-2. Upgrade the Elasticsearch specific dependencies in src/plugin/indexer-elastic/plugin.xml
-   To get the list of dependencies and their versions execute:
-   $ ant -f ./build-ivy.xml
-   $ ls lib/

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/ivy.xml b/src/plugin/indexer-elastic/ivy.xml
deleted file mode 100644
index f34075f..0000000
--- a/src/plugin/indexer-elastic/ivy.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<ivy-module version="1.0">
-  <info organisation="org.apache.nutch" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
-    <description>
-        Apache Nutch
-    </description>
-  </info>
-
-  <configurations>
-    <include file="../../..//ivy/ivy-configurations.xml"/>
-  </configurations>
-
-  <publications>
-    <!--get the artifact from our module name-->
-    <artifact conf="master"/>
-  </publications>
-
-  <dependencies>
-        <dependency org="org.elasticsearch" name="elasticsearch" rev="2.3.3"
-                    conf="*->default"/>
-  </dependencies>
-  
-</ivy-module>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/plugin.xml b/src/plugin/indexer-elastic/plugin.xml
deleted file mode 100644
index d99a665..0000000
--- a/src/plugin/indexer-elastic/plugin.xml
+++ /dev/null
@@ -1,71 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<plugin id="indexer-elastic" name="ElasticIndexWriter" version="1.0.0"
-  provider-name="nutch.apache.org">
-
-  <runtime>
-    <library name="indexer-elastic.jar">
-      <export name="*" />
-    </library>
-    <library name="elasticsearch-2.3.3.jar"/>
-    <library name="commons-cli-1.3.1.jar"/>
-    <library name="compress-lzf-1.0.2.jar"/>
-    <library name="guava-18.0.jar"/>
-    <library name="HdrHistogram-2.1.6.jar"/>
-    <library name="hppc-0.7.1.jar"/>
-    <library name="indexer-elastic.jar"/>
-    <library name="jackson-core-2.6.6.jar"/>
-    <library name="jackson-dataformat-cbor-2.6.6.jar"/>
-    <library name="jackson-dataformat-smile-2.6.6.jar"/>
-    <library name="jackson-dataformat-yaml-2.6.6.jar"/>
-    <library name="joda-convert-1.2.jar"/>
-    <library name="joda-time-2.8.2.jar"/>
-    <library name="jsr166e-1.1.0.jar"/>
-    <library name="lucene-analyzers-common-5.5.0.jar"/>
-    <library name="lucene-backward-codecs-5.5.0.jar"/>
-    <library name="lucene-core-5.5.0.jar"/>
-    <library name="lucene-grouping-5.5.0.jar"/>
-    <library name="lucene-highlighter-5.5.0.jar"/>
-    <library name="lucene-join-5.5.0.jar"/>
-    <library name="lucene-memory-5.5.0.jar"/>
-    <library name="lucene-misc-5.5.0.jar"/>
-    <library name="lucene-queries-5.5.0.jar"/>
-    <library name="lucene-queryparser-5.5.0.jar"/>
-    <library name="lucene-sandbox-5.5.0.jar"/>
-    <library name="lucene-spatial-5.5.0.jar"/>
-    <library name="lucene-spatial3d-5.5.0.jar"/>
-    <library name="lucene-suggest-5.5.0.jar"/>
-    <library name="netty-3.10.5.Final.jar"/>
-    <library name="securesm-1.0.jar"/>
-    <library name="snakeyaml-1.15.jar"/>
-    <library name="spatial4j-0.5.jar"/>
-    <library name="t-digest-3.0.jar"/>
-  </runtime>
-
-  <requires>
-    <import plugin="nutch-extensionpoints" />
-  </requires>
-
-  <extension id="org.apache.nutch.indexer.elastic"
-    name="Elasticsearch Index Writer"
-    point="org.apache.nutch.indexer.IndexWriter">
-    <implementation id="ElasticIndexWriter"
-      class="org.apache.nutch.indexwriter.elastic.ElasticIndexWriter" />
-  </extension>
-
-</plugin>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java
deleted file mode 100644
index b0e70c8..0000000
--- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.indexwriter.elastic;
-
-public interface ElasticConstants {
-  public static final String ELASTIC_PREFIX = "elastic.";
-
-  public static final String HOST = ELASTIC_PREFIX + "host";
-  public static final String PORT = ELASTIC_PREFIX + "port";
-  public static final String CLUSTER = ELASTIC_PREFIX + "cluster";
-  public static final String INDEX = ELASTIC_PREFIX + "index";
-  public static final String MAX_BULK_DOCS = ELASTIC_PREFIX + "max.bulk.docs";
-  public static final String MAX_BULK_LENGTH = ELASTIC_PREFIX + "max.bulk.size";
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
deleted file mode 100644
index 9367e41..0000000
--- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.indexwriter.elastic;
-
-import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.nutch.indexer.IndexWriter;
-import org.apache.nutch.indexer.NutchDocument;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.action.ListenableActionFuture;
-import org.elasticsearch.action.bulk.BulkItemResponse;
-import org.elasticsearch.action.bulk.BulkRequestBuilder;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.delete.DeleteRequestBuilder;
-import org.elasticsearch.action.index.IndexRequestBuilder;
-import org.elasticsearch.client.Client;
-import org.elasticsearch.client.transport.TransportClient;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.settings.Settings.Builder;
-import org.elasticsearch.common.transport.InetSocketTransportAddress;
-import org.elasticsearch.node.Node;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- */
-public class ElasticIndexWriter implements IndexWriter {
-  public static Logger LOG = LoggerFactory.getLogger(ElasticIndexWriter.class);
-
-  private static final int DEFAULT_MAX_BULK_DOCS = 250;
-  private static final int DEFAULT_MAX_BULK_LENGTH = 2500500;
-
-  private Client client;
-  private Node node;
-  private String defaultIndex;
-
-  private Configuration config;
-
-  private BulkRequestBuilder bulk;
-  private ListenableActionFuture<BulkResponse> execute;
-  private int port = -1;
-  private String host = null;
-  private String clusterName = null;
-  private int maxBulkDocs;
-  private int maxBulkLength;
-  private long indexedDocs = 0;
-  private int bulkDocs = 0;
-  private int bulkLength = 0;
-  private boolean createNewBulk = false;
-
-  @Override
-  public void open(JobConf job, String name) throws IOException {
-    clusterName = job.get(ElasticConstants.CLUSTER);
-
-    host = job.get(ElasticConstants.HOST);
-    port = job.getInt(ElasticConstants.PORT, 9300);
-
-    Builder settingsBuilder = Settings.builder();
-
-    BufferedReader reader = new BufferedReader(
-        job.getConfResourceAsReader("elasticsearch.conf"));
-    String line;
-    String parts[];
-
-    while ((line = reader.readLine()) != null) {
-      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
-        line.trim();
-        parts = line.split("=");
-
-        if (parts.length == 2) {
-          settingsBuilder.put(parts[0].trim(), parts[1].trim());
-        }
-      }
-    }
-
-    if (StringUtils.isNotBlank(clusterName))
-      settingsBuilder.put("cluster.name", clusterName);
-
-    // Set the cluster name and build the settings
-    Settings settings = settingsBuilder.build();
-
-    // Prefer TransportClient
-    if (host != null && port > 1) {
-      TransportClient transportClient = TransportClient.builder()
-          .settings(settings).build()
-          .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
-      client = transportClient;
-    } else if (clusterName != null) {
-      node = nodeBuilder().settings(settings).client(true).node();
-      client = node.client();
-    }
-
-    bulk = client.prepareBulk();
-    defaultIndex = job.get(ElasticConstants.INDEX, "nutch");
-    maxBulkDocs = job.getInt(ElasticConstants.MAX_BULK_DOCS,
-        DEFAULT_MAX_BULK_DOCS);
-    maxBulkLength = job.getInt(ElasticConstants.MAX_BULK_LENGTH,
-        DEFAULT_MAX_BULK_LENGTH);
-  }
-
-  @Override
-  public void write(NutchDocument doc) throws IOException {
-    String id = (String) doc.getFieldValue("id");
-    String type = doc.getDocumentMeta().get("type");
-    if (type == null)
-      type = "doc";
-    IndexRequestBuilder request = client.prepareIndex(defaultIndex, type, id);
-
-    Map<String, Object> source = new HashMap<String, Object>();
-
-    // Loop through all fields of this doc
-    for (String fieldName : doc.getFieldNames()) {
-      if (doc.getField(fieldName).getValues().size() > 1) {
-        source.put(fieldName, doc.getFieldValue(fieldName));
-        // Loop through the values to keep track of the size of this
-        // document
-        for (Object value : doc.getField(fieldName).getValues()) {
-          bulkLength += value.toString().length();
-        }
-      } else {
-        if (doc.getFieldValue(fieldName) != null) {
-          source.put(fieldName, doc.getFieldValue(fieldName));
-          bulkLength += doc.getFieldValue(fieldName).toString().length();
-        }
-      }
-    }
-    request.setSource(source);
-
-    // Add this indexing request to a bulk request
-    bulk.add(request);
-    indexedDocs++;
-    bulkDocs++;
-
-    if (bulkDocs >= maxBulkDocs || bulkLength >= maxBulkLength) {
-      LOG.info("Processing bulk request [docs = " + bulkDocs + ", length = "
-          + bulkLength + ", total docs = " + indexedDocs
-          + ", last doc in bulk = '" + id + "']");
-      // Flush the bulk of indexing requests
-      createNewBulk = true;
-      commit();
-    }
-  }
-
-  @Override
-  public void delete(String key) throws IOException {
-    try {
-      DeleteRequestBuilder builder = client.prepareDelete();
-      builder.setIndex(defaultIndex);
-      builder.setType("doc");
-      builder.setId(key);
-      builder.execute().actionGet();
-    } catch (ElasticsearchException e) {
-      throw makeIOException(e);
-    }
-  }
-
-  public static IOException makeIOException(ElasticsearchException e) {
-    final IOException ioe = new IOException();
-    ioe.initCause(e);
-    return ioe;
-  }
-
-  @Override
-  public void update(NutchDocument doc) throws IOException {
-    write(doc);
-  }
-
-  @Override
-  public void commit() throws IOException {
-    if (execute != null) {
-      // wait for previous to finish
-      long beforeWait = System.currentTimeMillis();
-      BulkResponse actionGet = execute.actionGet();
-      if (actionGet.hasFailures()) {
-        for (BulkItemResponse item : actionGet) {
-          if (item.isFailed()) {
-            throw new RuntimeException("First failure in bulk: "
-                + item.getFailureMessage());
-          }
-        }
-      }
-      long msWaited = System.currentTimeMillis() - beforeWait;
-      LOG.info("Previous took in ms " + actionGet.getTookInMillis()
-          + ", including wait " + msWaited);
-      execute = null;
-    }
-    if (bulk != null) {
-      if (bulkDocs > 0) {
-        // start a flush, note that this is an asynchronous call
-        execute = bulk.execute();
-      }
-      bulk = null;
-    }
-    if (createNewBulk) {
-      // Prepare a new bulk request
-      bulk = client.prepareBulk();
-      bulkDocs = 0;
-      bulkLength = 0;
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    // Flush pending requests
-    LOG.info("Processing remaining requests [docs = " + bulkDocs
-        + ", length = " + bulkLength + ", total docs = " + indexedDocs + "]");
-    createNewBulk = false;
-    commit();
-    // flush one more time to finalize the last bulk
-    LOG.info("Processing to finalize last execute");
-    createNewBulk = false;
-    commit();
-
-    // Close
-    client.close();
-    if (node != null) {
-      node.close();
-    }
-  }
-
-  @Override
-  public String describe() {
-    StringBuffer sb = new StringBuffer("ElasticIndexWriter\n");
-    sb.append("\t").append(ElasticConstants.CLUSTER)
-        .append(" : elastic prefix cluster\n");
-    sb.append("\t").append(ElasticConstants.HOST).append(" : hostname\n");
-    sb.append("\t").append(ElasticConstants.PORT).append(" : port\n");
-    sb.append("\t").append(ElasticConstants.INDEX)
-        .append(" : elastic index command \n");
-    sb.append("\t").append(ElasticConstants.MAX_BULK_DOCS)
-        .append(" : elastic bulk index doc counts. (default 250) \n");
-    sb.append("\t").append(ElasticConstants.MAX_BULK_LENGTH)
-        .append(" : elastic bulk index length. (default 2500500 ~2.5MB)\n");
-    return sb.toString();
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    config = conf;
-    String cluster = conf.get(ElasticConstants.CLUSTER);
-    String host = conf.get(ElasticConstants.HOST);
-
-    if (StringUtils.isBlank(cluster) && StringUtils.isBlank(host)) {
-      String message = "Missing elastic.cluster and elastic.host. At least one of them should be set in nutch-site.xml ";
-      message += "\n" + describe();
-      LOG.error(message);
-      throw new RuntimeException(message);
-    }
-  }
-
-  @Override
-  public Configuration getConf() {
-    return config;
-  }
-}

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
deleted file mode 100644
index f708334..0000000
--- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Index writer plugin for <a href="http://www.elasticsearch.org/">Elasticsearch</a>.
- */
-package org.apache.nutch.indexwriter.elastic;
-

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-solr/build-ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-solr/build-ivy.xml b/src/plugin/indexer-solr/build-ivy.xml
deleted file mode 100644
index 9832cf0..0000000
--- a/src/plugin/indexer-solr/build-ivy.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-solr" default="deps-jar" xmlns:ivy="antlib:org.apache.ivy.ant">
-
-    <property name="ivy.install.version" value="2.1.0" />
-    <condition property="ivy.home" value="${env.IVY_HOME}">
-      <isset property="env.IVY_HOME" />
-    </condition>
-    <property name="ivy.home" value="${user.home}/.ant" />
-    <property name="ivy.checksums" value="" />
-    <property name="ivy.jar.dir" value="${ivy.home}/lib" />
-    <property name="ivy.jar.file" value="${ivy.jar.dir}/ivy.jar" />
-
-    <target name="download-ivy" unless="offline">
-
-        <mkdir dir="${ivy.jar.dir}"/>
-        <!-- download Ivy from web site so that it can be used even without any special installation -->
-        <get src="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/ivy-${ivy.install.version}.jar" 
-             dest="${ivy.jar.file}" usetimestamp="true"/>
-    </target>
-
-    <target name="init-ivy" depends="download-ivy">
-      <!-- try to load ivy here from ivy home, in case the user has not already dropped
-              it into ant's lib dir (note that the latter copy will always take precedence).
-              We will not fail as long as local lib dir exists (it may be empty) and
-              ivy is in at least one of ant's lib dir or the local lib dir. -->
-        <path id="ivy.lib.path">
-            <fileset dir="${ivy.jar.dir}" includes="*.jar"/>
-
-        </path>
-        <taskdef resource="org/apache/ivy/ant/antlib.xml"
-                 uri="antlib:org.apache.ivy.ant" classpathref="ivy.lib.path"/>
-    </target>
-
-  <target name="deps-jar" depends="init-ivy">
-    <ivy:retrieve pattern="lib/[artifact]-[revision].[ext]"/>
-  </target>
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-solr/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-solr/build.xml b/src/plugin/indexer-solr/build.xml
deleted file mode 100644
index 8d77cdf..0000000
--- a/src/plugin/indexer-solr/build.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="indexer-solr" default="jar-core">
-
-  <import file="../build-plugin.xml" />
-
-</project>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-solr/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml
deleted file mode 100644
index 65e97e7..0000000
--- a/src/plugin/indexer-solr/ivy.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<ivy-module version="1.0">
-  <info organisation="org.apache.nutch" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
-    <description>
-        Apache Nutch
-    </description>
-  </info>
-
-  <configurations>
-    <include file="../../..//ivy/ivy-configurations.xml"/>
-  </configurations>
-
-  <publications>
-    <!--get the artifact from our module name-->
-    <artifact conf="master"/>
-  </publications>
-
-  <dependencies>
-    <dependency org="org.apache.solr" name="solr-solrj" rev="5.5.0"/>
-    <dependency org="org.apache.httpcomponents" name="httpcore" rev="4.4.1" conf="*->default"/>
-    <dependency org="org.apache.httpcomponents" name="httpmime" rev="4.4.1" conf="*->default"/>
-  </dependencies>
-  
-</ivy-module>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-solr/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-solr/plugin.xml b/src/plugin/indexer-solr/plugin.xml
deleted file mode 100644
index 0e86796..0000000
--- a/src/plugin/indexer-solr/plugin.xml
+++ /dev/null
@@ -1,48 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<plugin id="indexer-solr" name="SolrIndexWriter" version="1.0.0"
-  provider-name="nutch.apache.org">
-
-  <runtime>
-    <library name="indexer-solr.jar">
-      <export name="*" />
-    </library>
-      <library name="commons-io-2.4.jar"/>
-      <library name="httpclient-4.4.1.jar"/>
-      <library name="httpcore-4.4.1.jar"/>
-      <library name="httpmime-4.4.1.jar"/>
-      <library name="noggit-0.6.jar"/>
-      <library name="slf4j-api-1.7.7.jar"/>
-      <library name="solr-solrj-5.5.0.jar"/>
-      <library name="stax2-api-3.1.4.jar"/>
-      <library name="woodstox-core-asl-4.4.1.jar"/>
-      <library name="zookeeper-3.4.6.jar"/> 
-  </runtime>
-
-  <requires>
-    <import plugin="nutch-extensionpoints" />
-  </requires>
-
-  <extension id="org.apache.nutch.indexer.solr"
-    name="Solr Index Writer"
-    point="org.apache.nutch.indexer.IndexWriter">
-    <implementation id="SolrIndexWriter"
-      class="org.apache.nutch.indexwriter.solr.SolrIndexWriter" />
-  </extension>
-
-</plugin>

http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
----------------------------------------------------------------------
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
deleted file mode 100644
index 44a382e..0000000
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.indexwriter.solr;
-
-public interface SolrConstants {
-  public static final String SOLR_PREFIX = "solr.";
-
-  public static final String SERVER_URL = SOLR_PREFIX + "server.url";
-
-  public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
-
-  public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
-
-  public static final String USE_AUTH = SOLR_PREFIX + "auth";
-
-  public static final String USERNAME = SOLR_PREFIX + "auth.username";
-
-  public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-
-  public static final String COLLECTION = SOLR_PREFIX + "collection";
-
-  public static final String ZOOKEEPER_HOSTS = SOLR_PREFIX + "zookeeper.hosts";
-
-  public static final String ID_FIELD = "id";
-
-  public static final String URL_FIELD = "url";
-
-  public static final String BOOST_FIELD = "boost";
-
-  public static final String TIMESTAMP_FIELD = "tstamp";
-
-  public static final String DIGEST_FIELD = "digest";
-
-
-
-  @Deprecated
-  public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
-
-  @Deprecated
-  public static final String PARAMS = SOLR_PREFIX + "params";
-
-}