You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/02/13 17:59:07 UTC

[kudu] 02/03: [Java] Add PartialRow and RowResult getObject API

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 562b5f12f721b533cc2cb868c15a2f85d9e0114b
Author: Grant Henke <gr...@apache.org>
AuthorDate: Thu Feb 7 15:49:07 2019 -0600

    [Java] Add PartialRow and RowResult getObject API
    
    This patch adds getObject methods to PartialRow
    and RowResult to prevent people from repeating this bit
    of code over and over. Also replaces 2 places where this
    pattern is used.
    
    Change-Id: I7904a3478896c84fb998d1fddcc2a68c998d4a91
    Reviewed-on: http://gerrit.cloudera.org:8080/12393
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Grant Henke <gr...@apache.org>
---
 .../org/apache/kudu/backup/KuduBackupRDD.scala     | 23 +------
 .../java/org/apache/kudu/client/PartialRow.java    | 70 ++++++++++++++++++++
 .../java/org/apache/kudu/client/RowResult.java     | 75 ++++++++++++++++++++--
 .../org/apache/kudu/client/TestPartialRow.java     | 31 +++++++++
 .../java/org/apache/kudu/client/TestRowResult.java | 17 ++++-
 .../scala/org/apache/kudu/spark/kudu/KuduRDD.scala | 20 +-----
 6 files changed, 188 insertions(+), 48 deletions(-)

diff --git a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
index 2e45827..a5699d8 100644
--- a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
+++ b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
@@ -140,27 +140,6 @@ private class RowIterator(
     currentIterator.hasNext
   }
 
-  // TODO: Use a more "raw" encoding for efficiency?
-  private def get(rowResult: RowResult, i: Int): Any = {
-    if (rowResult.isNull(i)) null
-    else
-      rowResult.getColumnType(i) match {
-        case Type.BOOL => rowResult.getBoolean(i)
-        case Type.INT8 => rowResult.getByte(i)
-        case Type.INT16 => rowResult.getShort(i)
-        case Type.INT32 => rowResult.getInt(i)
-        case Type.INT64 => rowResult.getLong(i)
-        case Type.UNIXTIME_MICROS => rowResult.getTimestamp(i)
-        case Type.FLOAT => rowResult.getFloat(i)
-        case Type.DOUBLE => rowResult.getDouble(i)
-        case Type.STRING => rowResult.getString(i)
-        case Type.BINARY => rowResult.getBinaryCopy(i)
-        case Type.DECIMAL => rowResult.getDecimal(i)
-        case _ =>
-          throw new RuntimeException(s"Unsupported column type: ${rowResult.getColumnType(i)}")
-      }
-  }
-
   // TODO: There may be an old KuduRDD implementation where we did some
   // sort of zero copy/object pool pattern for performance (we could use that here).
   override def next(): Row = {
@@ -168,7 +147,7 @@ private class RowIterator(
     val columnCount = rowResult.getColumnProjection.getColumnCount
     val columns = Array.ofDim[Any](columnCount)
     for (i <- 0 until columnCount) {
-      columns(i) = get(rowResult, i)
+      columns(i) = rowResult.getObject(i)
     }
     Row.fromSeq(columns)
   }
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java b/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
index 585d970..c7c64bd 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
@@ -890,6 +890,76 @@ public class PartialRow {
   }
 
   /**
+   * Get the specified column's value as an Object.
+   *
+   * This method is useful when you don't care about autoboxing
+   * and your existing type handling logic is based on Java types.
+   *
+   * The Object type is based on the column's {@link Type}:
+   *  Type.BOOL -> java.lang.Boolean
+   *  Type.INT8 -> java.lang.Byte
+   *  Type.INT16 -> java.lang.Short
+   *  Type.INT32 -> java.lang.Integer
+   *  Type.INT64 -> java.lang.Long
+   *  Type.UNIXTIME_MICROS -> java.sql.Timestamp
+   *  Type.FLOAT -> java.lang.Float
+   *  Type.DOUBLE -> java.lang.Double
+   *  Type.STRING -> java.lang.String
+   *  Type.BINARY -> byte[]
+   *  Type.DECIMAL -> java.math.BigDecimal
+   *
+   * @param columnName name of the column in the schema
+   * @return the column's value as an Object, null if the column value is null or unset
+   * @throws IndexOutOfBoundsException if the column doesn't exist
+   */
+  public Object getObject(String columnName) {
+    return getObject(this.schema.getColumnIndex(columnName));
+  }
+
+  /**
+   * Get the specified column's value as an Object.
+   *
+   * This method is useful when you don't care about autoboxing
+   * and your existing type handling logic is based on Java types.
+   *
+   * The Object type is based on the column's {@link Type}:
+   *  Type.BOOL -> java.lang.Boolean
+   *  Type.INT8 -> java.lang.Byte
+   *  Type.INT16 -> java.lang.Short
+   *  Type.INT32 -> java.lang.Integer
+   *  Type.INT64 -> java.lang.Long
+   *  Type.UNIXTIME_MICROS -> java.sql.Timestamp
+   *  Type.FLOAT -> java.lang.Float
+   *  Type.DOUBLE -> java.lang.Double
+   *  Type.STRING -> java.lang.String
+   *  Type.BINARY -> byte[]
+   *  Type.DECIMAL -> java.math.BigDecimal
+   *
+   * @param columnIndex Column index in the schema
+   * @return the column's value as an Object, null if the column value is null or unset
+   * @throws IndexOutOfBoundsException if the column doesn't exist
+   */
+  public Object getObject(int columnIndex) {
+    checkColumnExists(schema.getColumnByIndex(columnIndex));
+    if (isNull(columnIndex) || !isSet(columnIndex)) return null;
+    Type type = schema.getColumnByIndex(columnIndex).getType();
+    switch (type) {
+      case BOOL: return getBoolean(columnIndex);
+      case INT8: return getByte(columnIndex);
+      case INT16: return getShort(columnIndex);
+      case INT32: return getInt(columnIndex);
+      case INT64: return getLong(columnIndex);
+      case UNIXTIME_MICROS: return getTimestamp(columnIndex);
+      case FLOAT: return getFloat(columnIndex);
+      case DOUBLE: return getDouble(columnIndex);
+      case STRING: return getString(columnIndex);
+      case BINARY: return getBinaryCopy(columnIndex);
+      case DECIMAL: return getDecimal(columnIndex);
+      default: throw new UnsupportedOperationException("Unsupported type: " + type);
+    }
+  }
+
+  /**
    * Verifies if the column exists and belongs to one of the specified types
    * @param column column the user wants to set
    * @param types types we expect
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java b/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
index ee8b20b..842ffdc 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
@@ -20,12 +20,7 @@ package org.apache.kudu.client;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.sql.Timestamp;
-import java.text.DateFormat;
-import java.text.FieldPosition;
-import java.text.SimpleDateFormat;
 import java.util.BitSet;
-import java.util.Date;
-import java.util.TimeZone;
 
 import org.apache.kudu.util.TimestampUtil;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -525,6 +520,76 @@ public class RowResult {
   }
 
   /**
+   * Get the specified column's value as an Object.
+   *
+   * This method is useful when you don't care about autoboxing
+   * and your existing type handling logic is based on Java types.
+   *
+   * The Object type is based on the column's {@link Type}:
+   *  Type.BOOL -> java.lang.Boolean
+   *  Type.INT8 -> java.lang.Byte
+   *  Type.INT16 -> java.lang.Short
+   *  Type.INT32 -> java.lang.Integer
+   *  Type.INT64 -> java.lang.Long
+   *  Type.UNIXTIME_MICROS -> java.sql.Timestamp
+   *  Type.FLOAT -> java.lang.Float
+   *  Type.DOUBLE -> java.lang.Double
+   *  Type.STRING -> java.lang.String
+   *  Type.BINARY -> byte[]
+   *  Type.DECIMAL -> java.math.BigDecimal
+   *
+   * @param columnName name of the column in the schema
+   * @return the column's value as an Object, null if the value is null
+   * @throws IndexOutOfBoundsException if the column doesn't exist
+   */
+  public Object getObject(String columnName) {
+    return getObject(this.schema.getColumnIndex(columnName));
+  }
+
+  /**
+   * Get the specified column's value as an Object.
+   *
+   * This method is useful when you don't care about autoboxing
+   * and your existing type handling logic is based on Java types.
+   *
+   * The Object type is based on the column's {@link Type}:
+   *  Type.BOOL -> java.lang.Boolean
+   *  Type.INT8 -> java.lang.Byte
+   *  Type.INT16 -> java.lang.Short
+   *  Type.INT32 -> java.lang.Integer
+   *  Type.INT64 -> java.lang.Long
+   *  Type.UNIXTIME_MICROS -> java.sql.Timestamp
+   *  Type.FLOAT -> java.lang.Float
+   *  Type.DOUBLE -> java.lang.Double
+   *  Type.STRING -> java.lang.String
+   *  Type.BINARY -> byte[]
+   *  Type.DECIMAL -> java.math.BigDecimal
+   *
+   * @param columnIndex Column index in the schema
+   * @return the column's value as an Object, null if the value is null
+   * @throws IndexOutOfBoundsException if the column doesn't exist
+   */
+  public Object getObject(int columnIndex) {
+    checkValidColumn(columnIndex);
+    if (isNull(columnIndex)) return null;
+    Type type = schema.getColumnByIndex(columnIndex).getType();
+    switch (type) {
+      case BOOL: return getBoolean(columnIndex);
+      case INT8: return getByte(columnIndex);
+      case INT16: return getShort(columnIndex);
+      case INT32: return getInt(columnIndex);
+      case INT64: return getLong(columnIndex);
+      case UNIXTIME_MICROS: return getTimestamp(columnIndex);
+      case FLOAT: return getFloat(columnIndex);
+      case DOUBLE: return getDouble(columnIndex);
+      case STRING: return getString(columnIndex);
+      case BINARY: return getBinaryCopy(columnIndex);
+      case DECIMAL: return getDecimal(columnIndex);
+      default: throw new UnsupportedOperationException("Unsupported type: " + type);
+    }
+  }
+
+  /**
    * Get the type of a column in this result.
    * @param columnName name of the column
    * @return a type
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
index 78f2768..d696e1f 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
@@ -21,6 +21,7 @@ import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -57,6 +58,36 @@ public class TestPartialRow {
     assertEquals(BigDecimal.valueOf(12345, 3), partialRow.getDecimal("decimal"));
   }
 
+  @Test
+  public void testGetObject() {
+    PartialRow partialRow = getPartialRowWithAllTypes();
+    assertTrue(partialRow.getObject("bool") instanceof Boolean);
+    assertEquals(true, partialRow.getObject("bool"));
+    assertTrue(partialRow.getObject("int8") instanceof Byte);
+    assertEquals((byte) 42, partialRow.getObject("int8"));
+    assertTrue(partialRow.getObject("int16") instanceof Short);
+    assertEquals((short)43, partialRow.getObject("int16"));
+    assertTrue(partialRow.getObject("int32") instanceof Integer);
+    assertEquals(44, partialRow.getObject("int32"));
+    assertTrue(partialRow.getObject("int64") instanceof Long);
+    assertEquals((long) 45, partialRow.getObject("int64"));
+    assertTrue(partialRow.getObject("timestamp") instanceof Timestamp);
+    assertEquals(new Timestamp(1234567890), partialRow.getObject("timestamp"));
+    assertTrue(partialRow.getObject("float") instanceof Float);
+    assertEquals(52.35F, (float) partialRow.getObject("float"), 0.0f);
+    assertTrue(partialRow.getObject("double") instanceof Double);
+    assertEquals(53.35, (double) partialRow.getObject("double"), 0.0);
+    assertTrue(partialRow.getObject("string") instanceof String);
+    assertEquals("fun with ütf\0", partialRow.getObject("string"));
+    assertTrue(partialRow.getObject("binary-array") instanceof byte[]);
+    assertArrayEquals(new byte[] { 0, 1, 2, 3, 4 }, partialRow.getBinaryCopy("binary-array"));
+    assertTrue(partialRow.getObject("binary-bytebuffer") instanceof byte[]);
+    assertEquals(ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }), partialRow.getBinary("binary-bytebuffer"));
+    assertNull(partialRow.getObject("null"));
+    assertTrue(partialRow.getObject("decimal") instanceof BigDecimal);
+    assertEquals(BigDecimal.valueOf(12345, 3), partialRow.getObject("decimal"));
+  }
+
   @Test(expected = IllegalArgumentException.class)
   public void testGetNullColumn() {
     PartialRow partialRow = getPartialRowWithAllTypes();
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
index 82c5956..7b367e2 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
@@ -21,6 +21,7 @@ import static org.apache.kudu.test.ClientTestUtil.getAllTypesCreateTableOptions;
 import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.math.BigDecimal;
@@ -86,30 +87,39 @@ public class TestRowResult {
       RowResult rr = it.next();
 
       assertEquals((byte) 1, rr.getByte(0));
+      assertEquals((byte) 1, rr.getObject(0));
       assertEquals((byte) 1, rr.getByte(allTypesSchema.getColumnByIndex(0).getName()));
 
       assertEquals((short) 2, rr.getShort(1));
+      assertEquals((short) 2, rr.getObject(1));
       assertEquals((short) 2, rr.getShort(allTypesSchema.getColumnByIndex(1).getName()));
 
       assertEquals(3, rr.getInt(2));
+      assertEquals(3, rr.getObject(2));
       assertEquals(3, rr.getInt(allTypesSchema.getColumnByIndex(2).getName()));
 
-      assertEquals(4, rr.getLong(3));
-      assertEquals(4, rr.getLong(allTypesSchema.getColumnByIndex(3).getName()));
+      assertEquals((long) 4, rr.getLong(3));
+      assertEquals((long) 4, rr.getObject(3));
+      assertEquals((long) 4, rr.getLong(allTypesSchema.getColumnByIndex(3).getName()));
 
       assertEquals(true, rr.getBoolean(4));
+      assertEquals(true, rr.getObject(4));
       assertEquals(true, rr.getBoolean(allTypesSchema.getColumnByIndex(4).getName()));
 
       assertEquals(5.6f, rr.getFloat(5), .001f);
+      assertEquals(5.6f, (float) rr.getObject(5), .001f);
       assertEquals(5.6f, rr.getFloat(allTypesSchema.getColumnByIndex(5).getName()), .001f);
 
       assertEquals(7.8, rr.getDouble(6), .001);
+      assertEquals(7.8, (double) rr.getObject(6), .001);
       assertEquals(7.8, rr.getDouble(allTypesSchema.getColumnByIndex(6).getName()), .001f);
 
       assertEquals("string-value", rr.getString(7));
+      assertEquals("string-value", rr.getObject(7));
       assertEquals("string-value", rr.getString(allTypesSchema.getColumnByIndex(7).getName()));
 
       assertArrayEquals("binary-array".getBytes(UTF_8), rr.getBinaryCopy(8));
+      assertArrayEquals("binary-array".getBytes(UTF_8), (byte[]) rr.getObject(8));
       assertArrayEquals("binary-array".getBytes(UTF_8),
           rr.getBinaryCopy(allTypesSchema.getColumnByIndex(8).getName()));
 
@@ -122,12 +132,15 @@ public class TestRowResult {
       assertArrayEquals("bytebuffer".getBytes(UTF_8), rr.getBinaryCopy(9));
 
       assertEquals(true, rr.isNull(10));
+      assertNull(rr.getObject(10));
       assertEquals(true, rr.isNull(allTypesSchema.getColumnByIndex(10).getName()));
 
       assertEquals(new Timestamp(11), rr.getTimestamp(11));
+      assertEquals(new Timestamp(11), rr.getObject(11));
       assertEquals(new Timestamp(11), rr.getTimestamp(allTypesSchema.getColumnByIndex(11).getName()));
 
       assertEquals(BigDecimal.valueOf(12345, 3), rr.getDecimal(12));
+      assertEquals(BigDecimal.valueOf(12345, 3), rr.getObject(12));
       assertEquals(BigDecimal.valueOf(12345, 3), rr.getDecimal(allTypesSchema.getColumnByIndex(12).getName()));
 
       // We test with the column name once since it's the same method for all types, unlike above.
diff --git a/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala b/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
index 356c290..78670a4 100644
--- a/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
+++ b/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
@@ -160,30 +160,12 @@ private class RowIterator(
     currentIterator.hasNext
   }
 
-  private def get(rowResult: RowResult, i: Int): Any = {
-    if (rowResult.isNull(i)) null
-    else
-      rowResult.getColumnType(i) match {
-        case Type.BOOL => rowResult.getBoolean(i)
-        case Type.INT8 => rowResult.getByte(i)
-        case Type.INT16 => rowResult.getShort(i)
-        case Type.INT32 => rowResult.getInt(i)
-        case Type.INT64 => rowResult.getLong(i)
-        case Type.UNIXTIME_MICROS => rowResult.getTimestamp(i)
-        case Type.FLOAT => rowResult.getFloat(i)
-        case Type.DOUBLE => rowResult.getDouble(i)
-        case Type.STRING => rowResult.getString(i)
-        case Type.BINARY => rowResult.getBinaryCopy(i)
-        case Type.DECIMAL => rowResult.getDecimal(i)
-      }
-  }
-
   override def next(): Row = {
     val rowResult = currentIterator.next()
     val columnCount = rowResult.getColumnProjection.getColumnCount
     val columns = Array.ofDim[Any](columnCount)
     for (i <- 0 until columnCount) {
-      columns(i) = get(rowResult, i)
+      columns(i) = rowResult.getObject(i)
     }
     Row.fromSeq(columns)
   }