You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@nutch.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2018/09/27 19:38:01 UTC
[jira] [Commented] (NUTCH-2602) Configuration values in the description of index writers

    [ https://issues.apache.org/jira/browse/NUTCH-2602?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16630951#comment-16630951 ] 

ASF GitHub Bot commented on NUTCH-2602:
---------------------------------------

r0ann3l closed pull request #356: fix for NUTCH-2602: Index writers description
URL: https://github.com/apache/nutch/pull/356
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 06bb9197a..112975ab9 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -151,6 +151,8 @@
 		<dependency org="org.apache.httpcomponents" name="httpcore" rev="4.4.9"/>
 		<dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.5"/>
 
+		<dependency org="de.vandermeer" name="asciitable" rev="0.3.2"/>
+
 		<!--global exclusion -->
 		<exclude module="jmxtools" />
 		<exclude module="jms" />
diff --git a/src/java/org/apache/nutch/indexer/CleaningJob.java b/src/java/org/apache/nutch/indexer/CleaningJob.java
index 8a77a9d82..9b496535b 100644
--- a/src/java/org/apache/nutch/indexer/CleaningJob.java
+++ b/src/java/org/apache/nutch/indexer/CleaningJob.java
@@ -185,8 +185,6 @@ public int run(String[] args) throws IOException {
       String usage = "Usage: CleaningJob <crawldb> [-noCommit]";
       LOG.error("Missing crawldb. " + usage);
       System.err.println(usage);
-      IndexWriters writers = IndexWriters.get(getConf());
-      System.err.println(writers.describe());
       return 1;
     }
 
diff --git a/src/java/org/apache/nutch/indexer/IndexWriter.java b/src/java/org/apache/nutch/indexer/IndexWriter.java
index b33c5070d..78661599e 100644
--- a/src/java/org/apache/nutch/indexer/IndexWriter.java
+++ b/src/java/org/apache/nutch/indexer/IndexWriter.java
@@ -21,8 +21,10 @@
 import org.apache.nutch.plugin.Pluggable;
 
 import java.io.IOException;
+import java.util.Map;
 
 public interface IndexWriter extends Pluggable, Configurable {
+
   /**
    * The name of the extension point.
    */
@@ -53,9 +55,9 @@
   public void close() throws IOException;
 
   /**
-   * Returns a String describing the IndexWriter instance and the specific parameters it can take.
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
    *
-   * @return The full description.
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
    */
-  public String describe();
+  Map<String, Map.Entry<String, Object>> describe();
 }
diff --git a/src/java/org/apache/nutch/indexer/IndexWriters.java b/src/java/org/apache/nutch/indexer/IndexWriters.java
index 3ac20bfea..9fac2e2fe 100644
--- a/src/java/org/apache/nutch/indexer/IndexWriters.java
+++ b/src/java/org/apache/nutch/indexer/IndexWriters.java
@@ -16,6 +16,10 @@
  */
 package org.apache.nutch.indexer;
 
+import de.vandermeer.asciitable.AT_ColumnWidthCalculator;
+import de.vandermeer.asciitable.AT_Row;
+import de.vandermeer.asciitable.AsciiTable;
+import de.vandermeer.skb.interfaces.document.TableRowType;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.exchange.Exchanges;
 import org.apache.nutch.plugin.Extension;
@@ -265,8 +269,52 @@ public String describe() {
       builder.append("Active IndexWriters :\n");
 
     for (IndexWriterWrapper indexWriterWrapper : this.indexWriters.values()) {
-      builder.append(indexWriterWrapper.getIndexWriter().describe())
-          .append("\n");
+      // Getting the class name
+      builder.append(
+          indexWriterWrapper.getIndexWriter().getClass().getSimpleName())
+          .append(":\n");
+
+      // Building the table
+      AsciiTable at = new AsciiTable();
+      at.getRenderer().setCWC((rows, colNumbers, tableWidth) -> {
+        int maxLengthFirstColumn = 0;
+        int maxLengthLastColumn = 0;
+        for (AT_Row row : rows) {
+          if (row.getType() == TableRowType.CONTENT) {
+            // First column
+            int lengthFirstColumn = row.getCells().get(0).toString().length();
+            if (lengthFirstColumn > maxLengthFirstColumn) {
+              maxLengthFirstColumn = lengthFirstColumn;
+            }
+
+            // Last column
+            int lengthLastColumn = row.getCells().get(2).toString().length();
+            if (lengthLastColumn > maxLengthLastColumn) {
+              maxLengthLastColumn = lengthLastColumn;
+            }
+          }
+        }
+        return new int[] { maxLengthFirstColumn,
+            tableWidth - maxLengthFirstColumn - maxLengthLastColumn,
+            maxLengthLastColumn };
+      });
+
+      // Getting the properties
+      Map<String, Map.Entry<String, Object>> properties = indexWriterWrapper
+          .getIndexWriter().describe();
+
+      // Adding the rows
+      properties.forEach((key, value) -> {
+        at.addRule();
+        at.addRow(key, value.getKey(),
+            value.getValue() != null ? value.getValue() : "");
+      });
+
+      // Last rule
+      at.addRule();
+
+      // Rendering the table
+      builder.append(at.render(150)).append("\n\n");
     }
 
     return builder.toString();
diff --git a/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java b/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
index 3ce4f8061..737c91b2b 100644
--- a/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
+++ b/src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
@@ -17,16 +17,22 @@
 package org.apache.nutch.indexer;
 
 import java.io.IOException;
+import java.lang.invoke.MethodHandles;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class IndexerOutputFormat
     extends FileOutputFormat<Text, NutchIndexAction> {
 
+  private static final Logger LOG = LoggerFactory
+      .getLogger(MethodHandles.lookup().lookupClass());
+
   @Override
   public RecordWriter<Text, NutchIndexAction> getRecordWriter(
       TaskAttemptContext context) throws IOException {
@@ -36,6 +42,7 @@
 
     String name = getUniqueFile(context, "part", "");
     writers.open(conf, name);
+    LOG.info(writers.describe());
 
     return new RecordWriter<Text, NutchIndexAction>() {
 
diff --git a/src/java/org/apache/nutch/indexer/IndexingJob.java b/src/java/org/apache/nutch/indexer/IndexingJob.java
index 67b7e0ba0..30c550414 100644
--- a/src/java/org/apache/nutch/indexer/IndexingJob.java
+++ b/src/java/org/apache/nutch/indexer/IndexingJob.java
@@ -120,8 +120,6 @@ public void index(Path crawlDb, Path linkDb, List<Path> segments,
         LOG.info("Indexer: adding binary content");
       }
     }
-    IndexWriters writers = IndexWriters.get(conf);
-    LOG.info(writers.describe());
 
     IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job, addBinaryContent);
 
@@ -174,8 +172,6 @@ public int run(String[] args) throws Exception {
       System.err
       //.println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] [-filter] [-normalize]");
       .println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] [-filter] [-normalize] [-addBinaryContent] [-base64]");
-      IndexWriters writers = IndexWriters.get(getConf());
-      System.err.println(writers.describe());
       return -1;
     }
 
diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
index 064a4f6ed..3c8ff5068 100644
--- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
+++ b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchConstants.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.nutch.indexwriter.cloudsearch;
 
 public interface CloudSearchConstants {
diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
index 2d72d9fa8..0d874e8e6 100644
--- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
+++ b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java
@@ -1,20 +1,19 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.nutch.indexwriter.cloudsearch;
 
 import java.lang.invoke.MethodHandles;
@@ -24,9 +23,11 @@
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.text.SimpleDateFormat;
+import java.util.AbstractMap;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -83,6 +84,7 @@
 
   private Map<String, String> csfields = new HashMap<String, String>();
 
+  private String endpoint;
   private String regionName;
 
   @Override
@@ -92,11 +94,11 @@ public void open(Configuration conf, String name) throws IOException {
 
   @Override
   public void open(IndexWriterParams parameters) throws IOException {
-//    LOG.debug("CloudSearchIndexWriter.open() name={} ", name);
+    //    LOG.debug("CloudSearchIndexWriter.open() name={} ", name);
 
-    String endpoint = parameters.get(CloudSearchConstants.ENDPOINT);
-    dumpBatchFilesToTemp = parameters.getBoolean(CloudSearchConstants.BATCH_DUMP,
-        false);
+    endpoint = parameters.get(CloudSearchConstants.ENDPOINT);
+    dumpBatchFilesToTemp = parameters
+        .getBoolean(CloudSearchConstants.BATCH_DUMP, false);
     this.regionName = parameters.get(CloudSearchConstants.REGION);
 
     if (StringUtils.isBlank(endpoint) && !dumpBatchFilesToTemp) {
@@ -328,7 +330,7 @@ public void close() throws IOException {
     // This will flush any unsent documents.
     commit();
     // close the client
-    if (client != null){
+    if (client != null) {
       client.shutdown();
     }
   }
@@ -342,37 +344,34 @@ public void setConf(Configuration conf) {
     this.conf = conf;
   }
 
-  public String describe() {
-    String configuredEndpoint = null;
-    String configuredRegion = null;
-
-    // get the values set in the conf
-    if (getConf() != null) {
-      configuredEndpoint = getConf().get(CloudSearchConstants.ENDPOINT);
-      configuredRegion = getConf().get(CloudSearchConstants.REGION);
-    }
-
-    StringBuffer sb = new StringBuffer("CloudSearchIndexWriter\n");
-    sb.append("\t").append(CloudSearchConstants.ENDPOINT)
-        .append(" : URL of the CloudSearch domain's document endpoint.");
-    if (StringUtils.isNotBlank(configuredEndpoint)) {
-      sb.append(" (value: ").append(configuredEndpoint).append(")");
-    }
-    sb.append("\n");
-
-    sb.append("\t").append(CloudSearchConstants.REGION)
-        .append(" : name of the CloudSearch region.");
-    if (StringUtils.isNotBlank(configuredRegion)) {
-      sb.append(" (").append(configuredRegion).append(")");
-    }
-    sb.append("\n");
-    return sb.toString();
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
+  @Override
+  public Map<String, Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(CloudSearchConstants.ENDPOINT, new AbstractMap.SimpleEntry<>(
+        "Endpoint where service requests should be submitted.", this.endpoint));
+    properties.put(CloudSearchConstants.REGION,
+        new AbstractMap.SimpleEntry<>("Region name.", this.regionName));
+    properties.put(CloudSearchConstants.BATCH_DUMP,
+        new AbstractMap.SimpleEntry<>("true to send documents to a local file.",
+            this.dumpBatchFilesToTemp));
+    properties.put(CloudSearchConstants.MAX_DOCS_BATCH,
+        new AbstractMap.SimpleEntry<>(
+            "Maximum number of documents to send as a batch to CloudSearch.",
+            this.maxDocsInBatch));
+
+    return properties;
   }
 
   /**
    * Remove the non-cloudSearch-legal characters. Note that this might convert
    * two fields to the same name.
-   * 
+   *
    * @param name
    * @return
    */
diff --git a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
index c17467aa0..0f83a0d6e 100644
--- a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
+++ b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java
@@ -19,16 +19,23 @@
 
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.AbstractMap;
 import java.util.Date;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
-import org.apache.nutch.indexer.*;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.IndexingJob;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.NutchField;
 import org.apache.nutch.util.NutchConfiguration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -178,35 +185,6 @@ protected int find(String value, int start) {
   private String outputPath = "csvindexwriter";
 
 
-  private static final String description =
-      " - write index as CSV file (comma separated values)"
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_FIELDS,
-          "ordered list of fields (columns) in the CSV file")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_FIELD_SEPARATOR,
-          "separator between fields (columns), default: , (U+002C, comma)")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_QUOTECHARACTER,
-          "quote character used to quote fields containing separators or quotes, "
-              + "default: \" (U+0022, quotation mark)")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_ESCAPECHARACTER,
-          "escape character used to escape a quote character, "
-              + "default: \" (U+0022, quotation mark)")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_VALUESEPARATOR,
-          "separator between multiple values of one field, "
-              + "default: | (U+007C)")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_MAXFIELDVALUES,
-          "max. number of values of one field, useful for, "
-              + " e.g., the anchor texts field, default: 12")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_MAXFIELDLENGTH,
-          "max. length of a single field value in characters, default: 4096.")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_CHARSET,
-          "encoding of CSV file, default: UTF-8")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_WITHHEADER,
-          "write CSV column headers, default: true")
-      + String.format("\n  %-24s : %s", CSVConstants.CSV_OUTPATH,
-          "output path / directory, default: csvindexwriter. "
-          + "\n    CAVEAT: existing output directories are removed!") + "\n";
-
-
   private FileSystem fs;
 
   protected FSDataOutputStream csvout;
@@ -345,9 +323,47 @@ public Configuration getConf() {
     return config;
   }
 
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
   @Override
-  public String describe() {
-    return getClass().getSimpleName() + description;
+  public Map<String, Map.Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(CSVConstants.CSV_FIELDS, new AbstractMap.SimpleEntry<>(
+        "Ordered list of fields (columns) in the CSV file",
+        this.fields == null ? "" : String.join(",", this.fields)));
+    properties.put(CSVConstants.CSV_FIELD_SEPARATOR, new AbstractMap.SimpleEntry<>(
+        "Separator between fields (columns), default: , (U+002C, comma)",
+        this.fieldSeparator));
+    properties.put(CSVConstants.CSV_QUOTECHARACTER, new AbstractMap.SimpleEntry<>(
+        "Quote character used to quote fields containing separators or quotes, default: \" (U+0022, quotation mark)",
+        this.quoteCharacter));
+    properties.put(CSVConstants.CSV_ESCAPECHARACTER, new AbstractMap.SimpleEntry<>(
+        "Escape character used to escape a quote character, default: \" (U+0022, quotation mark)",
+        this.escapeCharacter));
+    properties.put(CSVConstants.CSV_VALUESEPARATOR, new AbstractMap.SimpleEntry<>(
+        "Separator between multiple values of one field, default: | (U+007C)",
+        this.valueSeparator));
+    properties.put(CSVConstants.CSV_MAXFIELDVALUES, new AbstractMap.SimpleEntry<>(
+        "Max. number of values of one field, useful for, e.g., the anchor texts field, default: 12",
+        this.maxFieldValues));
+    properties.put(CSVConstants.CSV_MAXFIELDLENGTH, new AbstractMap.SimpleEntry<>(
+        "Max. length of a single field value in characters, default: 4096",
+        this.maxFieldLength));
+    properties.put(CSVConstants.CSV_CHARSET, new AbstractMap.SimpleEntry<>(
+        "Encoding of CSV file, default: UTF-8",
+        this.encoding));
+    properties.put(CSVConstants.CSV_WITHHEADER, new AbstractMap.SimpleEntry<>(
+        "Write CSV column headers, default: true",
+        this.withHeader));
+    properties.put(CSVConstants.CSV_OUTPATH, new AbstractMap.SimpleEntry<>(
+        "Output path / directory, default: csvindexwriter. ",
+        this.outputPath));
+
+    return properties;
   }
 
   @Override
diff --git a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
index 7a4003674..d809f66b1 100644
--- a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
+++ b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
@@ -21,6 +21,10 @@
 import java.io.IOException;
 import java.io.FileWriter;
 import java.io.Writer;
+import java.util.AbstractMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.nutch.indexer.IndexWriter;
@@ -35,47 +39,51 @@
  * and add.
  */
 public class DummyIndexWriter implements IndexWriter {
+
   private static final Logger LOG = LoggerFactory
       .getLogger(MethodHandles.lookup().lookupClass());
+
   private Configuration config;
   private Writer writer;
+
   private boolean delete = false;
+  private String path;
 
   public void open(Configuration conf, String name) throws IOException {
-      //Implementation not required
+    //Implementation not required
   }
 
-    /**
-     * Initializes the internal variables from a given index writer configuration.
-     *
-     * @param parameters Params from the index writer configuration.
-     * @throws IOException Some exception thrown by writer.
-     */
-    @Override
-    public void open(IndexWriterParams parameters) throws IOException {
-        delete = parameters.getBoolean(DummyConstants.DELETE, false);
-
-        String path = parameters.get(DummyConstants.PATH, "/");
-        if (path == null) {
-            String message = "Missing path.";
-            message += "\n" + describe();
-            LOG.error(message);
-            throw new RuntimeException(message);
-        }
-
-        if (writer != null) {
-            LOG.warn("Dummy index file already open for writing");
-            return;
-        }
-
-        try {
-            LOG.debug("Opening dummy index file {}", path);
-            writer = new BufferedWriter(new FileWriter(path));
-        } catch (IOException ex) {
-            LOG.error("Failed to open index file {}: {}", path,
-                    StringUtils.stringifyException(ex));
-        }
+  /**
+   * Initializes the internal variables from a given index writer configuration.
+   *
+   * @param parameters Params from the index writer configuration.
+   * @throws IOException Some exception thrown by writer.
+   */
+  @Override
+  public void open(IndexWriterParams parameters) throws IOException {
+    delete = parameters.getBoolean(DummyConstants.DELETE, false);
+
+    path = parameters.get(DummyConstants.PATH, "/");
+    if (path == null) {
+      String message = "Missing path.";
+      message += "\n" + describe();
+      LOG.error(message);
+      throw new RuntimeException(message);
+    }
+
+    if (writer != null) {
+      LOG.warn("Dummy index file already open for writing");
+      return;
+    }
+
+    try {
+      LOG.debug("Opening dummy index file {}", path);
+      writer = new BufferedWriter(new FileWriter(path));
+    } catch (IOException ex) {
+      LOG.error("Failed to open index file {}: {}", path,
+          StringUtils.stringifyException(ex));
     }
+  }
 
   @Override
   public void delete(String key) throws IOException {
@@ -115,10 +123,20 @@ public void setConf(Configuration conf) {
     config = conf;
   }
 
-  public String describe() {
-    StringBuffer sb = new StringBuffer("DummyIndexWriter\n");
-    sb.append("\t").append(
-        "dummy.path : Path of the file to write to (mandatory)\n");
-    return sb.toString();
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
+  @Override
+  public Map<String, Map.Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(DummyConstants.DELETE, new AbstractMap.SimpleEntry<>(
+        "If delete operations should be written to the file.", this.delete));
+    properties.put(DummyConstants.PATH, new AbstractMap.SimpleEntry<>(
+        "Path where the file will be created.", this.path));
+
+    return properties;
   }
 }
diff --git a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
index f40f0b8fc..0ddf539d3 100644
--- a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
+++ b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
@@ -51,11 +51,13 @@
 import java.security.NoSuchAlgorithmException;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
+import java.util.AbstractMap;
+import java.util.Date;
 import java.util.HashMap;
-import java.util.Map;
+import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.Map;
 import java.util.Set;
-import java.util.Date;
 import java.util.concurrent.ExecutionException;
 
 /**
@@ -372,18 +374,74 @@ public void close() throws IOException {
     client.shutdownClient();
   }
 
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
   @Override
-  public String describe() {
-    StringBuffer sb = new StringBuffer("ElasticRestIndexWriter\n");
-    sb.append("\t").append(ElasticRestConstants.HOST).append(" : hostname\n");
-    sb.append("\t").append(ElasticRestConstants.PORT).append(" : port\n");
-    sb.append("\t").append(ElasticRestConstants.INDEX)
-        .append(" : elastic index command \n");
-    sb.append("\t").append(ElasticRestConstants.MAX_BULK_DOCS)
-        .append(" : elastic bulk index doc counts. (default 250) \n");
-    sb.append("\t").append(ElasticRestConstants.MAX_BULK_LENGTH)
-        .append(" : elastic bulk index length. (default 2500500 ~2.5MB)\n");
-    return sb.toString();
+  public Map<String, Map.Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(ElasticRestConstants.HOST, new AbstractMap.SimpleEntry<>(
+        "The hostname or a list of comma separated hostnames to send documents "
+            + "to using Elasticsearch Jest. Both host and port must be defined.",
+        this.host));
+    properties.put(ElasticRestConstants.PORT, new AbstractMap.SimpleEntry<>(
+        "The port to connect to using Elasticsearch Jest.", this.port));
+    properties.put(ElasticRestConstants.INDEX,
+        new AbstractMap.SimpleEntry<>("Default index to send documents to.",
+            this.defaultIndex));
+    properties.put(ElasticRestConstants.MAX_BULK_DOCS,
+        new AbstractMap.SimpleEntry<>(
+            "Maximum size of the bulk in number of documents.",
+            this.maxBulkDocs));
+    properties.put(ElasticRestConstants.MAX_BULK_LENGTH,
+        new AbstractMap.SimpleEntry<>("Maximum size of the bulk in bytes.",
+            this.maxBulkLength));
+
+    properties.put(ElasticRestConstants.USER, new AbstractMap.SimpleEntry<>(
+        "Username for auth credentials (only used when https is enabled)",
+        this.user));
+    properties.put(ElasticRestConstants.PASSWORD, new AbstractMap.SimpleEntry<>(
+        "Password for auth credentials (only used when https is enabled)",
+        this.password));
+    properties.put(ElasticRestConstants.TYPE,
+        new AbstractMap.SimpleEntry<>("Default type to send documents to.",
+            this.defaultType));
+    properties.put(ElasticRestConstants.HTTPS, new AbstractMap.SimpleEntry<>(
+        "true to enable https, false to disable https. If you've disabled http "
+            + "access (by forcing https), be sure to set this to true, otherwise "
+            + "you might get \"connection reset by peer\".", this.https));
+    properties.put(ElasticRestConstants.HOSTNAME_TRUST,
+        new AbstractMap.SimpleEntry<>(
+            "true to trust elasticsearch server's certificate even if its listed "
+                + "domain name does not match the domain they are hosted or false "
+                + "to check if the elasticsearch server's certificate's listed "
+                + "domain is the same domain that it is hosted on, and if "
+                + "it doesn't, then fail to index (only used when https is enabled)",
+            this.trustAllHostnames));
+
+    properties.put(ElasticRestConstants.LANGUAGES,
+        new AbstractMap.SimpleEntry<>(
+            "A list of strings denoting the supported languages (e.g. en, de, fr, it). "
+                + "If this value is empty all documents will be sent to index property. "
+                + "If not empty the Rest client will distribute documents in different "
+                + "indices based on their languages property. Indices are named with the "
+                + "following schema: index separator language (e.g. nutch_de). "
+                + "Entries with an unsupported languages value will be added to "
+                + "index index separator sink (e.g. nutch_others).",
+            this.languages == null ? "" : String.join(",", languages)));
+    properties.put(ElasticRestConstants.SEPARATOR,
+        new AbstractMap.SimpleEntry<>(
+            "Is used only if languages property is defined to build the index name "
+                + "(i.e. index separator lang).", this.separator));
+    properties.put(ElasticRestConstants.SINK, new AbstractMap.SimpleEntry<>(
+        "Is used only if languages property is defined to build the index name "
+            + "where to store documents with unsupported languages "
+            + "(i.e. index separator sink).", this.sink));
+
+    return properties;
   }
 
   @Override
diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
index 5fa2def90..d9a1b3e1a 100644
--- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
+++ b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
@@ -21,7 +21,9 @@
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.net.InetAddress;
+import java.util.AbstractMap;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -65,6 +67,15 @@
   private static final int DEFAULT_BULK_CLOSE_TIMEOUT = 600;
   private static final String DEFAULT_INDEX = "nutch";
 
+  private String cluster;
+  private String[] hosts;
+  private int port;
+
+  private int maxBulkDocs;
+  private int maxBulkLength;
+  private int expBackoffMillis;
+  private int expBackoffRetries;
+
   private String defaultIndex;
   private Client client;
   private Node node;
@@ -87,7 +98,7 @@ public void open(Configuration conf, String name) throws IOException {
    */
   @Override
   public void open(IndexWriterParams parameters) throws IOException {
-    String cluster = parameters.get(ElasticConstants.CLUSTER);
+    cluster = parameters.get(ElasticConstants.CLUSTER);
     String hosts = parameters.get(ElasticConstants.HOSTS);
 
     if (StringUtils.isBlank(cluster) && StringUtils.isBlank(hosts)) {
@@ -101,14 +112,14 @@ public void open(IndexWriterParams parameters) throws IOException {
         DEFAULT_BULK_CLOSE_TIMEOUT);
     defaultIndex = parameters.get(ElasticConstants.INDEX, DEFAULT_INDEX);
 
-    int maxBulkDocs = parameters
+    maxBulkDocs = parameters
         .getInt(ElasticConstants.MAX_BULK_DOCS, DEFAULT_MAX_BULK_DOCS);
-    int maxBulkLength = parameters
+    maxBulkLength = parameters
         .getInt(ElasticConstants.MAX_BULK_LENGTH, DEFAULT_MAX_BULK_LENGTH);
-    int expBackoffMillis = parameters
+    expBackoffMillis = parameters
         .getInt(ElasticConstants.EXPONENTIAL_BACKOFF_MILLIS,
             DEFAULT_EXP_BACKOFF_MILLIS);
-    int expBackoffRetries = parameters
+    expBackoffRetries = parameters
         .getInt(ElasticConstants.EXPONENTIAL_BACKOFF_RETRIES,
             DEFAULT_EXP_BACKOFF_RETRIES);
 
@@ -128,9 +139,8 @@ public void open(IndexWriterParams parameters) throws IOException {
    * Generates a TransportClient or NodeClient
    */
   protected Client makeClient(IndexWriterParams parameters) throws IOException {
-    String clusterName = parameters.get(ElasticConstants.CLUSTER);
-    String[] hosts = parameters.getStrings(ElasticConstants.HOSTS);
-    int port = parameters.getInt(ElasticConstants.PORT, DEFAULT_PORT);
+    hosts = parameters.getStrings(ElasticConstants.HOSTS);
+    port = parameters.getInt(ElasticConstants.PORT, DEFAULT_PORT);
 
     Settings.Builder settingsBuilder = Settings.builder();
 
@@ -149,8 +159,8 @@ protected Client makeClient(IndexWriterParams parameters) throws IOException {
     }
 
     // Set the cluster name and build the settings
-    if (StringUtils.isNotBlank(clusterName)) {
-      settingsBuilder.put("cluster.name", clusterName);
+    if (StringUtils.isNotBlank(cluster)) {
+      settingsBuilder.put("cluster.name", cluster);
     }
 
     Settings settings = settingsBuilder.build();
@@ -166,7 +176,7 @@ protected Client makeClient(IndexWriterParams parameters) throws IOException {
         transportClient.addTransportAddress(
             new InetSocketTransportAddress(InetAddress.getByName(host), port));
       client = transportClient;
-    } else if (clusterName != null) {
+    } else if (cluster != null) {
       node = new Node(settings);
       client = node.client();
     }
@@ -255,31 +265,49 @@ public void close() throws IOException {
     }
   }
 
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
   @Override
-  public String describe() {
-    StringBuffer sb = new StringBuffer("ElasticIndexWriter\n");
-    sb.append("\t").append(ElasticConstants.CLUSTER)
-        .append(" : elastic prefix cluster\n");
-    sb.append("\t").append(ElasticConstants.HOSTS).append(" : hostname\n");
-    sb.append("\t").append(ElasticConstants.PORT).append(" : port\n");
-    sb.append("\t").append(ElasticConstants.INDEX)
-        .append(" : elastic index command \n");
-    sb.append("\t").append(ElasticConstants.MAX_BULK_DOCS)
-        .append(" : elastic bulk index doc counts. (default ")
-        .append(DEFAULT_MAX_BULK_DOCS).append(")\n");
-    sb.append("\t").append(ElasticConstants.MAX_BULK_LENGTH)
-        .append(" : elastic bulk index length in bytes. (default ")
-        .append(DEFAULT_MAX_BULK_LENGTH).append(")\n");
-    sb.append("\t").append(ElasticConstants.EXPONENTIAL_BACKOFF_MILLIS).append(
-        " : elastic bulk exponential backoff initial delay in milliseconds. (default ")
-        .append(DEFAULT_EXP_BACKOFF_MILLIS).append(")\n");
-    sb.append("\t").append(ElasticConstants.EXPONENTIAL_BACKOFF_RETRIES)
-        .append(" : elastic bulk exponential backoff max retries. (default ")
-        .append(DEFAULT_EXP_BACKOFF_RETRIES).append(")\n");
-    sb.append("\t").append(ElasticConstants.BULK_CLOSE_TIMEOUT)
-        .append(" : elastic timeout for the last bulk in seconds. (default ")
-        .append(DEFAULT_BULK_CLOSE_TIMEOUT).append(")\n");
-    return sb.toString();
+  public Map<String, Map.Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(ElasticConstants.CLUSTER, new AbstractMap.SimpleEntry<>(
+        "The cluster name to discover. Either host and port must be defined or cluster.",
+        this.cluster));
+    properties.put(ElasticConstants.HOSTS, new AbstractMap.SimpleEntry<>(
+        "Ordered list of fields (columns) in the CSV fileComma-separated list of "
+            + "hostnames to send documents to using TransportClient. "
+            + "Either host and port must be defined or cluster.",
+        this.hosts == null ? "" : String.join(",", hosts)));
+    properties.put(ElasticConstants.PORT, new AbstractMap.SimpleEntry<>(
+        "The port to connect to using TransportClient.", this.port));
+    properties.put(ElasticConstants.INDEX,
+        new AbstractMap.SimpleEntry<>("Default index to send documents to.",
+            this.defaultIndex));
+    properties.put(ElasticConstants.MAX_BULK_DOCS,
+        new AbstractMap.SimpleEntry<>(
+            "Maximum size of the bulk in number of documents.",
+            this.maxBulkDocs));
+    properties.put(ElasticConstants.MAX_BULK_LENGTH,
+        new AbstractMap.SimpleEntry<>("Maximum size of the bulk in bytes.",
+            this.maxBulkLength));
+    properties.put(ElasticConstants.EXPONENTIAL_BACKOFF_MILLIS,
+        new AbstractMap.SimpleEntry<>(
+            "Initial delay for the BulkProcessor exponential backoff policy.",
+            this.expBackoffMillis));
+    properties.put(ElasticConstants.EXPONENTIAL_BACKOFF_RETRIES,
+        new AbstractMap.SimpleEntry<>(
+            "Number of times the BulkProcessor exponential backoff policy should retry bulk operations.",
+            this.expBackoffRetries));
+    properties.put(ElasticConstants.BULK_CLOSE_TIMEOUT,
+        new AbstractMap.SimpleEntry<>(
+            "Number of seconds allowed for the BulkProcessor to complete its last operation.",
+            this.bulkCloseTimeout));
+
+    return properties;
   }
 
   @Override
diff --git a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
index 301d7e9dc..616ee8d9c 100644
--- a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
+++ b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
@@ -30,7 +30,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.AbstractMap;
 import java.util.Arrays;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -39,7 +41,11 @@
   public static final Logger LOG = LoggerFactory
       .getLogger(RabbitIndexWriter.class);
 
+  private String uri;
+
   private String exchange;
+  private String exchangeOptions;
+
   private String routingKey;
 
   private int commitSize;
@@ -48,6 +54,12 @@
   private String headersStatic;
   private List<String> headersDynamic;
 
+  private boolean binding;
+  private String bindingArguments;
+
+  private String queueName;
+  private String queueOptions;
+
   private Configuration config;
 
   private RabbitMessage rabbitMessage = new RabbitMessage();
@@ -87,19 +99,19 @@ public void open(IndexWriterParams parameters) throws IOException {
     headersDynamic = Arrays
         .asList(parameters.getStrings(RabbitMQConstants.HEADERS_DYNAMIC, ""));
 
-    String uri = parameters.get(RabbitMQConstants.SERVER_URI);
+    uri = parameters.get(RabbitMQConstants.SERVER_URI);
 
     client = new RabbitMQClient(uri);
     client.openChannel();
 
-    boolean binding = parameters.getBoolean(RabbitMQConstants.BINDING, false);
+    binding = parameters.getBoolean(RabbitMQConstants.BINDING, false);
     if (binding) {
-      String queueName = parameters.get(RabbitMQConstants.QUEUE_NAME);
-      String queueOptions = parameters.get(RabbitMQConstants.QUEUE_OPTIONS);
+      queueName = parameters.get(RabbitMQConstants.QUEUE_NAME);
+      queueOptions = parameters.get(RabbitMQConstants.QUEUE_OPTIONS);
 
-      String exchangeOptions = parameters.get(RabbitMQConstants.EXCHANGE_OPTIONS);
+      exchangeOptions = parameters.get(RabbitMQConstants.EXCHANGE_OPTIONS);
 
-      String bindingArguments = parameters
+      bindingArguments = parameters
           .get(RabbitMQConstants.BINDING_ARGUMENTS, "");
 
       client
@@ -199,33 +211,71 @@ public void close() throws IOException {
     client.close();
   }
 
-  public String describe() {
-    StringBuffer sb = new StringBuffer("RabbitIndexWriter\n");
-    sb.append("\t").append(RabbitMQConstants.SERVER_URI)
-        .append(" : URI of RabbitMQ server\n");
-    sb.append("\t").append(RabbitMQConstants.BINDING).append(
-        " : If binding is created automatically or not (default true)\n");
-    sb.append("\t").append(RabbitMQConstants.BINDING_ARGUMENTS)
-        .append(" : Arguments used in binding\n");
-    sb.append("\t").append(RabbitMQConstants.EXCHANGE_NAME)
-        .append(" : Exchange's name\n");
-    sb.append("\t").append(RabbitMQConstants.EXCHANGE_OPTIONS)
-        .append(" : Exchange's options\n");
-    sb.append("\t").append(RabbitMQConstants.QUEUE_NAME)
-        .append(" : Queue's name\n");
-    sb.append("\t").append(RabbitMQConstants.QUEUE_OPTIONS)
-        .append(" : Queue's options\n");
-    sb.append("\t").append(RabbitMQConstants.ROUTING_KEY)
-        .append(" : Routing key\n");
-    sb.append("\t").append(RabbitMQConstants.COMMIT_SIZE)
-        .append(" : Buffer size when sending to RabbitMQ (default 250)\n");
-    sb.append("\t").append(RabbitMQConstants.COMMIT_MODE)
-        .append(" : The mode to send the documents (default multiple)\n");
-    sb.append("\t").append(RabbitMQConstants.HEADERS_STATIC)
-        .append(" : Static headers that will be added to the messages\n");
-    sb.append("\t").append(RabbitMQConstants.HEADERS_DYNAMIC)
-        .append(" : Document's fields added as headers\n");
-    return sb.toString();
+  /**
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
+   *
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
+   */
+  @Override
+  public Map<String, Map.Entry<String, Object>> describe() {
+    Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(RabbitMQConstants.SERVER_URI, new AbstractMap.SimpleEntry<>(
+        "URI with connection parameters in the form amqp://<username>:<password>@<hostname>:<port>/<virtualHost>",
+        this.uri));
+    properties.put(RabbitMQConstants.BINDING, new AbstractMap.SimpleEntry<>(
+        "Whether the relationship between an exchange and a queue is created automatically. "
+            + "NOTE: Binding between exchanges is not supported.",
+        this.binding));
+    properties.put(RabbitMQConstants.BINDING_ARGUMENTS,
+        new AbstractMap.SimpleEntry<>(
+            "Arguments used in binding. It must have the form key1=value1,key2=value2. "
+                + "This value is only used when the exchange's type is headers and "
+                + "the value of binding property is true. In other cases is ignored.",
+            this.bindingArguments));
+    properties.put(RabbitMQConstants.EXCHANGE_NAME,
+        new AbstractMap.SimpleEntry<>(
+            "Name for the exchange where the messages will be sent.",
+            this.exchange));
+    properties.put(RabbitMQConstants.EXCHANGE_OPTIONS,
+        new AbstractMap.SimpleEntry<>(
+            "Options used when the exchange is created. Only used when the value of binding property is true. "
+                + "It must have the form type=<type>,durable=<durable>",
+            this.exchangeOptions));
+    properties.put(RabbitMQConstants.QUEUE_NAME, new AbstractMap.SimpleEntry<>(
+        "Name of the queue used to create the binding. Only used when the value "
+            + "of binding property is true.", this.queueName));
+    properties.put(RabbitMQConstants.QUEUE_OPTIONS,
+        new AbstractMap.SimpleEntry<>(
+            "Options used when the queue is created. Only used when the value of "
+                + "binding property is true. It must have the form "
+                + "durable=<durable>,exclusive=<exclusive>,auto-delete=<auto-delete>,arguments=<arguments>",
+            this.queueOptions));
+    properties.put(RabbitMQConstants.ROUTING_KEY, new AbstractMap.SimpleEntry<>(
+        "The routing key used to route messages in the exchange. "
+            + "It only makes sense when the exchange type is topic or direct.",
+        this.routingKey));
+    properties.put(RabbitMQConstants.COMMIT_MODE, new AbstractMap.SimpleEntry<>(
+        "single if a message contains only one document. "
+            + "In this case, a header with the action (write, update or delete) will be added. "
+            + "multiple if a message contains all documents.",
+        this.commitMode));
+    properties.put(RabbitMQConstants.COMMIT_SIZE, new AbstractMap.SimpleEntry<>(
+        "Amount of documents to send into each message if the value of commit.mode "
+            + "property is multiple. In single mode this value represents "
+            + "the amount of messages to be sent.", this.commitSize));
+    properties.put(RabbitMQConstants.HEADERS_STATIC,
+        new AbstractMap.SimpleEntry<>(
+            "Headers to add to each message. It must have the form key1=value1,key2=value2.",
+            this.headersStatic));
+    properties.put(RabbitMQConstants.HEADERS_DYNAMIC,
+        new AbstractMap.SimpleEntry<>(
+            "Document's fields to add as headers to each message. "
+                + "It must have the form field1,field2. "
+                + "Only used when the value of commit.mode property is single",
+            this.headersDynamic));
+
+    return properties;
   }
 
   private void addHeaders(final RabbitMQMessage message,
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
index 24172c6bc..475d313d7 100644
--- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -21,10 +21,13 @@
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
 import java.time.format.DateTimeFormatter;
+import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.hadoop.conf.Configuration;
@@ -56,6 +59,10 @@
 
   private final List<String> deleteIds = new ArrayList<>();
 
+  private String type;
+  private String[] urls;
+  private String collection;
+
   private int batchSize;
   private int numDeletes = 0;
   private int totalAdds = 0;
@@ -79,9 +86,9 @@ public void open(Configuration conf, String name) {
    */
   @Override
   public void open(IndexWriterParams parameters) {
-    String type = parameters.get(SolrConstants.SERVER_TYPE, "http");
-
-    String[] urls = parameters.getStrings(SolrConstants.SERVER_URLS);
+    this.type = parameters.get(SolrConstants.SERVER_TYPE, "http");
+    this.urls = parameters.getStrings(SolrConstants.SERVER_URLS);
+    this.collection = parameters.get(SolrConstants.COLLECTION);
 
     if (urls == null) {
       String message = "Missing SOLR URL.\n" + describe();
@@ -106,7 +113,7 @@ public void open(IndexWriterParams parameters) {
           SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username,
               this.password) :
           SolrUtils.getCloudSolrClient(Arrays.asList(urls));
-      sc.setDefaultCollection(parameters.get(SolrConstants.COLLECTION));
+      sc.setDefaultCollection(this.collection);
       solrClients.add(sc);
       break;
     case "concurrent":
@@ -288,25 +295,43 @@ public void setConf(Configuration conf) {
   }
 
   /**
-   * Returns a String describing the IndexWriter instance and the specific parameters it can take.
+   * Returns {@link Map} with the specific parameters the IndexWriter instance can take.
    *
-   * @return The full description.
+   * @return The values of each row. It must have the form <KEY,<DESCRIPTION,VALUE>>.
    */
   @Override
-  public String describe() {
-    StringBuffer sb = new StringBuffer("SOLRIndexWriter\n");
-    sb.append("\t").append(SolrConstants.SERVER_TYPE).append(
-        " : Type of the server. Can be: \"cloud\", \"concurrent\", \"http\" or \"lb\"\n");
-    sb.append("\t").append(SolrConstants.SERVER_URLS)
-        .append(" : URL of the SOLR instance or URL of the Zookeeper quorum\n");
-    sb.append("\t").append(SolrConstants.COMMIT_SIZE)
-        .append(" : buffer size when sending to SOLR (default 1000)\n");
-    sb.append("\t").append(SolrConstants.USE_AUTH)
-        .append(" : use authentication (default false)\n");
-    sb.append("\t").append(SolrConstants.USERNAME)
-        .append(" : username for authentication\n");
-    sb.append("\t").append(SolrConstants.PASSWORD)
-        .append(" : password for authentication\n");
-    return sb.toString();
+  public Map<String, Entry<String, Object>> describe() {
+    Map<String, Entry<String, Object>> properties = new LinkedHashMap<>();
+
+    properties.put(SolrConstants.SERVER_TYPE, new AbstractMap.SimpleEntry<>(
+        "Specifies the SolrClient implementation to use. This is a string value of one of the following \"cloud\" or \"http\"."
+            + " The values represent CloudSolrServer or HttpSolrServer respectively.",
+        this.type));
+    properties.put(SolrConstants.SERVER_URLS, new AbstractMap.SimpleEntry<>(
+        "Defines the fully qualified URL of Solr into which data should be indexed. Multiple URL can be provided using comma as a delimiter."
+            + " When the value of type property is cloud, the URL should not include any collections or cores; just the root Solr path.",
+        this.urls == null ? "" : String.join(",", urls)));
+    properties.put(SolrConstants.COLLECTION, new AbstractMap.SimpleEntry<>(
+        "The collection used in requests. Only used when the value of type property is cloud.",
+        this.collection));
+    properties.put(SolrConstants.COMMIT_SIZE, new AbstractMap.SimpleEntry<>(
+        "Defines the number of documents to send to Solr in a single update batch. "
+            + "Decrease when handling very large documents to prevent Nutch from running out of memory.\n"
+            + "Note: It does not explicitly trigger a server side commit.",
+        this.batchSize));
+    properties.put(SolrConstants.WEIGHT_FIELD, new AbstractMap.SimpleEntry<>(
+        "Field's name where the weight of the documents will be written. If it is empty no field will be used.",
+        this.weightField));
+    properties.put(SolrConstants.USE_AUTH, new AbstractMap.SimpleEntry<>(
+        "Whether to enable HTTP basic authentication for communicating with Solr. Use the username and password properties to configure your credentials.",
+        this.auth));
+    properties.put(SolrConstants.USERNAME,
+        new AbstractMap.SimpleEntry<>("The username of Solr server.",
+            this.username));
+    properties.put(SolrConstants.PASSWORD,
+        new AbstractMap.SimpleEntry<>("The password of Solr server.",
+            this.password));
+
+    return properties;
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Configuration values in the description of index writers
> --------------------------------------------------------
>
>                 Key: NUTCH-2602
>                 URL: https://issues.apache.org/jira/browse/NUTCH-2602
>             Project: Nutch
>          Issue Type: Improvement
>          Components: indexer, plugin
>    Affects Versions: 1.15
>            Reporter: Roannel Fernández Hernández
>            Assignee: Roannel Fernández Hernández
>            Priority: Minor
>             Fix For: 1.16
>
>         Attachments: Nutch output.png
>
>
> Since [GitHub Pull Request #218|https://github.com/apache/nutch/pull/218] when you have 2+ different configuration of the same index writers (the same implementation class), the index command print the same description several times. I propose the {{describe()}} method show the values of its own configuration and not a generic one.
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)