You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/02/15 22:48:31 UTC

[2/2] kudu git commit: flume: Fix RegexpKuduOperationsProducer javadocs

flume: Fix RegexpKuduOperationsProducer javadocs

This fixes some oversights in the Javadoc documentation of the Flume
RegexpKuduOperationsProducer. It also makes the JDK8 javadoc linter pass
(for this file only).

It also makes a minor improvement to avoid calling toLowerCase() in a
loop and changes a couple of loose strings to be constants.

Change-Id: I47fc65808f03e27872900a58b686b9204e46a2c6
Reviewed-on: http://gerrit.cloudera.org:8080/6020
Reviewed-by: Will Berkeley <wd...@gmail.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/987cbc23
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/987cbc23
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/987cbc23

Branch: refs/heads/master
Commit: 987cbc23f35e424fbfb1b5a7f9df3377d7dc46da
Parents: 79ffb17
Author: Mike Percy <mp...@cloudera.com>
Authored: Wed Feb 15 12:59:07 2017 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Wed Feb 15 22:48:09 2017 +0000

----------------------------------------------------------------------
 .../kudu/flume/sink/KuduOperationsProducer.java |  4 +-
 .../sink/RegexpKuduOperationsProducer.java      | 80 +++++++++++---------
 2 files changed, 46 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/987cbc23/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java
----------------------------------------------------------------------
diff --git a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java
index b38f387..6c9c3cd 100644
--- a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java
+++ b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/KuduOperationsProducer.java
@@ -45,8 +45,8 @@ public interface KuduOperationsProducer extends Configurable, AutoCloseable {
 
   /**
    * Returns the operations that should be written to Kudu as a result of this event.
-   * @return List of {@link org.apache.kudu.client.Operation} that
-   * should be written to Kudu
+   * @param event Event to convert to one or more Operations
+   * @return List of Operations that should be written to Kudu
    */
   List<Operation> getOperations(Event event);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/987cbc23/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java
----------------------------------------------------------------------
diff --git a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java
index 4e261ff..14fd345 100644
--- a/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java
+++ b/java/kudu-flume-sink/src/main/java/org/apache/kudu/flume/sink/RegexpKuduOperationsProducer.java
@@ -45,71 +45,80 @@ import org.apache.kudu.client.PartialRow;
 import org.apache.kudu.client.Upsert;
 
 /**
- * A regular expression serializer that generates one {@link Insert} or
- * {@link Upsert} per {@link Event} by parsing the payload into values using a
- * regular expression. Values are coerced to the proper column types.
+ * A regular expression operations producer that generates one or more Kudu
+ * {@link Insert} or {@link Upsert} operations per Flume {@link Event} by
+ * parsing the event {@code body} using a regular expression. Values are
+ * coerced to the types of the named columns in the Kudu table.
  *
- * Example: if the Kudu table has the schema
+ * <p>Example: If the Kudu table has the schema:
  *
+ * <pre>
  * key INT32
- * name STRING
+ * name STRING</pre>
  *
- * and producer.pattern is '(?&lt;key&gt;\\d+),(?&lt;name&gt;\w+)', then the
- * RegexpKuduOperationsProducer will parse the string
+ * <p>and {@code producer.pattern = (?<key>\\d+),(?<name>\\w+)} then
+ * {@code RegexpKuduOperationsProducer} will parse the string:
  *
- * |12345,Mike||54321,Todd|
+ * <pre>|12345,Mike||54321,Todd|</pre>
  *
- * into the rows (key=12345, name=Mike) and (key=54321, name=Todd).
+ * into the rows: {@code (key=12345, name=Mike)} and {@code (key=54321, name=Todd)}.
  *
- * Note: this class relies on JDK7 named capturing groups, which are documented
- * in {@link Pattern}.
+ * <p>Note: This class relies on JDK7 named capturing groups, which are
+ * documented in {@link Pattern}. The name of each capturing group must
+ * correspond to a column name in the destination Kudu table.
  *
- * <p><strong>Regular Expression Kudu Operations Producer configuration parameters</strong></p>
+ * <p><strong><code>RegexpKuduOperationsProducer</code> Flume Configuration Parameters</strong></p>
  *
- * <table cellpadding=3 cellspacing=0 border=1>
+ * <table cellpadding=3 cellspacing=0 border=1 summary="Flume Configuration Parameters">
  * <tr>
  *   <th>Property Name</th>
  *   <th>Default</th>
  *   <th>Required?</th>
  *   <th>Description</th>
  * </tr>
- * <tr></tr><td>producer.pattern</td><td></td><td>Yes</td>
- * <td>The regular expression used to parse the event body.</td>
+ * <tr>
+ *   <td>producer.pattern</td>
+ *   <td></td>
+ *   <td>Yes</td>
+ *   <td>The regular expression used to parse the event body.</td>
  * </tr>
  * <tr>
  *   <td>producer.charset</td>
  *   <td>utf-8</td>
  *   <td>No</td>
- *   <td>The charset of the event body.</td>
+ *   <td>The character set of the event body.</td>
  * </tr>
  * <tr>
  *   <td>producer.operation</td>
  *   <td>upsert</td>
  *   <td>No</td>
- *   <td>Operation type used to write the event to Kudu. Must be 'insert' or
- *   'upsert'.</td>
+ *   <td>Operation type used to write the event to Kudu. Must be either
+ *   {@code insert} or {@code upsert}.</td>
  * </tr>
  * <tr>
  *   <td>producer.skipMissingColumn</td>
  *   <td>false</td>
  *   <td>No</td>
- *   <td>Whether to ignore a column if it has no corresponding capture group, or
- *   instead completely abandon the attempt to parse and insert/upsert the row.
+ *   <td>What to do if a column in the Kudu table has no corresponding capture group.
+ *   If set to {@code true}, a warning message is logged and the operation is still attempted.
+ *   If set to {@code false}, an exception is thrown and the sink will not process the
+ *   {@code Event}, causing a Flume {@code Channel} rollback.
  * </tr>
  * <tr>
  *   <td>producer.skipBadColumnValue</td>
  *   <td>false</td>
  *   <td>No</td>
- *   <td>Whether to omit a column value from the row if its raw value cannot be
- *   coerced to the right type, or instead complete abandon the attempt to parse
- *   and insert/operation the row.</td>
+ *   <td>What to do if a value in the pattern match cannot be coerced to the required type.
+ *   If set to {@code true}, a warning message is logged and the operation is still attempted.
+ *   If set to {@code false}, an exception is thrown and the sink will not process the
+ *   {@code Event}, causing a Flume {@code Channel} rollback.
  * </tr>
  * <tr>
  *   <td>producer.warnUnmatchedRows</td>
  *   <td>true</td>
  *   <td>No</td>
- *   <td>Whether to warn about payloads that do not match the pattern. If this
- *   option is not set, event bodies with no matches will be silently dropped.</td>
+ *   <td>Whether to log a warning about payloads that do not match the pattern. If set to
+ *   {@code false}, event bodies with no matches will be silently dropped.</td>
  * </tr>
  * </table>
  *
@@ -119,12 +128,15 @@ import org.apache.kudu.client.Upsert;
 @InterfaceStability.Evolving
 public class RegexpKuduOperationsProducer implements KuduOperationsProducer {
   private static final Logger logger = LoggerFactory.getLogger(RegexpKuduOperationsProducer.class);
+  private static final String INSERT = "insert";
+  private static final String UPSERT = "upsert";
+  private static final List<String> validOperations = Lists.newArrayList(UPSERT, INSERT);
 
   public static final String PATTERN_PROP = "pattern";
   public static final String ENCODING_PROP = "encoding";
   public static final String DEFAULT_ENCODING = "utf-8";
   public static final String OPERATION_PROP = "operation";
-  public static final String DEFAULT_OPERATION = "upsert";
+  public static final String DEFAULT_OPERATION = UPSERT;
   public static final String SKIP_MISSING_COLUMN_PROP = "skipMissingColumn";
   public static final boolean DEFAULT_SKIP_MISSING_COLUMN = false;
   public static final String SKIP_BAD_COLUMN_VALUE_PROP = "skipBadColumnValue";
@@ -132,9 +144,6 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer {
   public static final String WARN_UNMATCHED_ROWS_PROP = "skipUnmatchedRows";
   public static final boolean DEFAULT_WARN_UNMATCHED_ROWS = true;
 
-  private static final List<String> validOperations =
-      Lists.newArrayList("upsert", "insert");
-
   private KuduTable table;
   private Pattern pattern;
   private Charset charset;
@@ -165,10 +174,9 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer {
       throw new FlumeException(
           String.format("Invalid or unsupported charset %s", charsetName), e);
     }
-    operation = context.getString(OPERATION_PROP,
-        DEFAULT_OPERATION);
+    operation = context.getString(OPERATION_PROP, DEFAULT_OPERATION).toLowerCase();
     Preconditions.checkArgument(
-        validOperations.contains(operation.toLowerCase()),
+        validOperations.contains(operation),
         "Unrecognized operation '%s'",
         operation);
     skipMissingColumn = context.getBoolean(SKIP_MISSING_COLUMN_PROP,
@@ -194,16 +202,16 @@ public class RegexpKuduOperationsProducer implements KuduOperationsProducer {
     while (m.find()) {
       match = true;
       Operation op;
-      switch (operation.toLowerCase()) {
-        case "upsert":
+      switch (operation) {
+        case UPSERT:
           op = table.newUpsert();
           break;
-        case "insert":
+        case INSERT:
           op = table.newInsert();
           break;
         default:
           throw new FlumeException(
-              String.format("Unrecognized operation type '%s' in getOperations: " +
+              String.format("Unrecognized operation type '%s' in getOperations(): " +
                   "this should never happen!", operation));
       }
       PartialRow row = op.getRow();