You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/02/13 17:59:07 UTC
[kudu] 02/03: [Java] Add PartialRow and RowResult getObject API
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 562b5f12f721b533cc2cb868c15a2f85d9e0114b
Author: Grant Henke <gr...@apache.org>
AuthorDate: Thu Feb 7 15:49:07 2019 -0600
[Java] Add PartialRow and RowResult getObject API
This patch adds getObject methods to PartialRow
and RowResult to prevent people from repeating this bit
of code over and over. Also replaces 2 places where this
pattern is used.
Change-Id: I7904a3478896c84fb998d1fddcc2a68c998d4a91
Reviewed-on: http://gerrit.cloudera.org:8080/12393
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Grant Henke <gr...@apache.org>
---
.../org/apache/kudu/backup/KuduBackupRDD.scala | 23 +------
.../java/org/apache/kudu/client/PartialRow.java | 70 ++++++++++++++++++++
.../java/org/apache/kudu/client/RowResult.java | 75 ++++++++++++++++++++--
.../org/apache/kudu/client/TestPartialRow.java | 31 +++++++++
.../java/org/apache/kudu/client/TestRowResult.java | 17 ++++-
.../scala/org/apache/kudu/spark/kudu/KuduRDD.scala | 20 +-----
6 files changed, 188 insertions(+), 48 deletions(-)
diff --git a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
index 2e45827..a5699d8 100644
--- a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
+++ b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/KuduBackupRDD.scala
@@ -140,27 +140,6 @@ private class RowIterator(
currentIterator.hasNext
}
- // TODO: Use a more "raw" encoding for efficiency?
- private def get(rowResult: RowResult, i: Int): Any = {
- if (rowResult.isNull(i)) null
- else
- rowResult.getColumnType(i) match {
- case Type.BOOL => rowResult.getBoolean(i)
- case Type.INT8 => rowResult.getByte(i)
- case Type.INT16 => rowResult.getShort(i)
- case Type.INT32 => rowResult.getInt(i)
- case Type.INT64 => rowResult.getLong(i)
- case Type.UNIXTIME_MICROS => rowResult.getTimestamp(i)
- case Type.FLOAT => rowResult.getFloat(i)
- case Type.DOUBLE => rowResult.getDouble(i)
- case Type.STRING => rowResult.getString(i)
- case Type.BINARY => rowResult.getBinaryCopy(i)
- case Type.DECIMAL => rowResult.getDecimal(i)
- case _ =>
- throw new RuntimeException(s"Unsupported column type: ${rowResult.getColumnType(i)}")
- }
- }
-
// TODO: There may be an old KuduRDD implementation where we did some
// sort of zero copy/object pool pattern for performance (we could use that here).
override def next(): Row = {
@@ -168,7 +147,7 @@ private class RowIterator(
val columnCount = rowResult.getColumnProjection.getColumnCount
val columns = Array.ofDim[Any](columnCount)
for (i <- 0 until columnCount) {
- columns(i) = get(rowResult, i)
+ columns(i) = rowResult.getObject(i)
}
Row.fromSeq(columns)
}
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java b/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
index 585d970..c7c64bd 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/PartialRow.java
@@ -890,6 +890,76 @@ public class PartialRow {
}
/**
+ * Get the specified column's value as an Object.
+ *
+ * This method is useful when you don't care about autoboxing
+ * and your existing type handling logic is based on Java types.
+ *
+ * The Object type is based on the column's {@link Type}:
+ * Type.BOOL -> java.lang.Boolean
+ * Type.INT8 -> java.lang.Byte
+ * Type.INT16 -> java.lang.Short
+ * Type.INT32 -> java.lang.Integer
+ * Type.INT64 -> java.lang.Long
+ * Type.UNIXTIME_MICROS -> java.sql.Timestamp
+ * Type.FLOAT -> java.lang.Float
+ * Type.DOUBLE -> java.lang.Double
+ * Type.STRING -> java.lang.String
+ * Type.BINARY -> byte[]
+ * Type.DECIMAL -> java.math.BigDecimal
+ *
+ * @param columnName name of the column in the schema
+ * @return the column's value as an Object, null if the column value is null or unset
+ * @throws IndexOutOfBoundsException if the column doesn't exist
+ */
+ public Object getObject(String columnName) {
+ return getObject(this.schema.getColumnIndex(columnName));
+ }
+
+ /**
+ * Get the specified column's value as an Object.
+ *
+ * This method is useful when you don't care about autoboxing
+ * and your existing type handling logic is based on Java types.
+ *
+ * The Object type is based on the column's {@link Type}:
+ * Type.BOOL -> java.lang.Boolean
+ * Type.INT8 -> java.lang.Byte
+ * Type.INT16 -> java.lang.Short
+ * Type.INT32 -> java.lang.Integer
+ * Type.INT64 -> java.lang.Long
+ * Type.UNIXTIME_MICROS -> java.sql.Timestamp
+ * Type.FLOAT -> java.lang.Float
+ * Type.DOUBLE -> java.lang.Double
+ * Type.STRING -> java.lang.String
+ * Type.BINARY -> byte[]
+ * Type.DECIMAL -> java.math.BigDecimal
+ *
+ * @param columnIndex Column index in the schema
+ * @return the column's value as an Object, null if the column value is null or unset
+ * @throws IndexOutOfBoundsException if the column doesn't exist
+ */
+ public Object getObject(int columnIndex) {
+ checkColumnExists(schema.getColumnByIndex(columnIndex));
+ if (isNull(columnIndex) || !isSet(columnIndex)) return null;
+ Type type = schema.getColumnByIndex(columnIndex).getType();
+ switch (type) {
+ case BOOL: return getBoolean(columnIndex);
+ case INT8: return getByte(columnIndex);
+ case INT16: return getShort(columnIndex);
+ case INT32: return getInt(columnIndex);
+ case INT64: return getLong(columnIndex);
+ case UNIXTIME_MICROS: return getTimestamp(columnIndex);
+ case FLOAT: return getFloat(columnIndex);
+ case DOUBLE: return getDouble(columnIndex);
+ case STRING: return getString(columnIndex);
+ case BINARY: return getBinaryCopy(columnIndex);
+ case DECIMAL: return getDecimal(columnIndex);
+ default: throw new UnsupportedOperationException("Unsupported type: " + type);
+ }
+ }
+
+ /**
* Verifies if the column exists and belongs to one of the specified types
* @param column column the user wants to set
* @param types types we expect
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java b/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
index ee8b20b..842ffdc 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/RowResult.java
@@ -20,12 +20,7 @@ package org.apache.kudu.client;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
-import java.text.DateFormat;
-import java.text.FieldPosition;
-import java.text.SimpleDateFormat;
import java.util.BitSet;
-import java.util.Date;
-import java.util.TimeZone;
import org.apache.kudu.util.TimestampUtil;
import org.apache.yetus.audience.InterfaceAudience;
@@ -525,6 +520,76 @@ public class RowResult {
}
/**
+ * Get the specified column's value as an Object.
+ *
+ * This method is useful when you don't care about autoboxing
+ * and your existing type handling logic is based on Java types.
+ *
+ * The Object type is based on the column's {@link Type}:
+ * Type.BOOL -> java.lang.Boolean
+ * Type.INT8 -> java.lang.Byte
+ * Type.INT16 -> java.lang.Short
+ * Type.INT32 -> java.lang.Integer
+ * Type.INT64 -> java.lang.Long
+ * Type.UNIXTIME_MICROS -> java.sql.Timestamp
+ * Type.FLOAT -> java.lang.Float
+ * Type.DOUBLE -> java.lang.Double
+ * Type.STRING -> java.lang.String
+ * Type.BINARY -> byte[]
+ * Type.DECIMAL -> java.math.BigDecimal
+ *
+ * @param columnName name of the column in the schema
+ * @return the column's value as an Object, null if the value is null
+ * @throws IndexOutOfBoundsException if the column doesn't exist
+ */
+ public Object getObject(String columnName) {
+ return getObject(this.schema.getColumnIndex(columnName));
+ }
+
+ /**
+ * Get the specified column's value as an Object.
+ *
+ * This method is useful when you don't care about autoboxing
+ * and your existing type handling logic is based on Java types.
+ *
+ * The Object type is based on the column's {@link Type}:
+ * Type.BOOL -> java.lang.Boolean
+ * Type.INT8 -> java.lang.Byte
+ * Type.INT16 -> java.lang.Short
+ * Type.INT32 -> java.lang.Integer
+ * Type.INT64 -> java.lang.Long
+ * Type.UNIXTIME_MICROS -> java.sql.Timestamp
+ * Type.FLOAT -> java.lang.Float
+ * Type.DOUBLE -> java.lang.Double
+ * Type.STRING -> java.lang.String
+ * Type.BINARY -> byte[]
+ * Type.DECIMAL -> java.math.BigDecimal
+ *
+ * @param columnIndex Column index in the schema
+ * @return the column's value as an Object, null if the value is null
+ * @throws IndexOutOfBoundsException if the column doesn't exist
+ */
+ public Object getObject(int columnIndex) {
+ checkValidColumn(columnIndex);
+ if (isNull(columnIndex)) return null;
+ Type type = schema.getColumnByIndex(columnIndex).getType();
+ switch (type) {
+ case BOOL: return getBoolean(columnIndex);
+ case INT8: return getByte(columnIndex);
+ case INT16: return getShort(columnIndex);
+ case INT32: return getInt(columnIndex);
+ case INT64: return getLong(columnIndex);
+ case UNIXTIME_MICROS: return getTimestamp(columnIndex);
+ case FLOAT: return getFloat(columnIndex);
+ case DOUBLE: return getDouble(columnIndex);
+ case STRING: return getString(columnIndex);
+ case BINARY: return getBinaryCopy(columnIndex);
+ case DECIMAL: return getDecimal(columnIndex);
+ default: throw new UnsupportedOperationException("Unsupported type: " + type);
+ }
+ }
+
+ /**
* Get the type of a column in this result.
* @param columnName name of the column
* @return a type
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
index 78f2768..d696e1f 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartialRow.java
@@ -21,6 +21,7 @@ import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -57,6 +58,36 @@ public class TestPartialRow {
assertEquals(BigDecimal.valueOf(12345, 3), partialRow.getDecimal("decimal"));
}
+ @Test
+ public void testGetObject() {
+ PartialRow partialRow = getPartialRowWithAllTypes();
+ assertTrue(partialRow.getObject("bool") instanceof Boolean);
+ assertEquals(true, partialRow.getObject("bool"));
+ assertTrue(partialRow.getObject("int8") instanceof Byte);
+ assertEquals((byte) 42, partialRow.getObject("int8"));
+ assertTrue(partialRow.getObject("int16") instanceof Short);
+ assertEquals((short)43, partialRow.getObject("int16"));
+ assertTrue(partialRow.getObject("int32") instanceof Integer);
+ assertEquals(44, partialRow.getObject("int32"));
+ assertTrue(partialRow.getObject("int64") instanceof Long);
+ assertEquals((long) 45, partialRow.getObject("int64"));
+ assertTrue(partialRow.getObject("timestamp") instanceof Timestamp);
+ assertEquals(new Timestamp(1234567890), partialRow.getObject("timestamp"));
+ assertTrue(partialRow.getObject("float") instanceof Float);
+ assertEquals(52.35F, (float) partialRow.getObject("float"), 0.0f);
+ assertTrue(partialRow.getObject("double") instanceof Double);
+ assertEquals(53.35, (double) partialRow.getObject("double"), 0.0);
+ assertTrue(partialRow.getObject("string") instanceof String);
+ assertEquals("fun with ütf\0", partialRow.getObject("string"));
+ assertTrue(partialRow.getObject("binary-array") instanceof byte[]);
+ assertArrayEquals(new byte[] { 0, 1, 2, 3, 4 }, partialRow.getBinaryCopy("binary-array"));
+ assertTrue(partialRow.getObject("binary-bytebuffer") instanceof byte[]);
+ assertEquals(ByteBuffer.wrap(new byte[] { 5, 6, 7, 8, 9 }), partialRow.getBinary("binary-bytebuffer"));
+ assertNull(partialRow.getObject("null"));
+ assertTrue(partialRow.getObject("decimal") instanceof BigDecimal);
+ assertEquals(BigDecimal.valueOf(12345, 3), partialRow.getObject("decimal"));
+ }
+
@Test(expected = IllegalArgumentException.class)
public void testGetNullColumn() {
PartialRow partialRow = getPartialRowWithAllTypes();
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
index 82c5956..7b367e2 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestRowResult.java
@@ -21,6 +21,7 @@ import static org.apache.kudu.test.ClientTestUtil.getAllTypesCreateTableOptions;
import static org.apache.kudu.test.ClientTestUtil.getSchemaWithAllTypes;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.math.BigDecimal;
@@ -86,30 +87,39 @@ public class TestRowResult {
RowResult rr = it.next();
assertEquals((byte) 1, rr.getByte(0));
+ assertEquals((byte) 1, rr.getObject(0));
assertEquals((byte) 1, rr.getByte(allTypesSchema.getColumnByIndex(0).getName()));
assertEquals((short) 2, rr.getShort(1));
+ assertEquals((short) 2, rr.getObject(1));
assertEquals((short) 2, rr.getShort(allTypesSchema.getColumnByIndex(1).getName()));
assertEquals(3, rr.getInt(2));
+ assertEquals(3, rr.getObject(2));
assertEquals(3, rr.getInt(allTypesSchema.getColumnByIndex(2).getName()));
- assertEquals(4, rr.getLong(3));
- assertEquals(4, rr.getLong(allTypesSchema.getColumnByIndex(3).getName()));
+ assertEquals((long) 4, rr.getLong(3));
+ assertEquals((long) 4, rr.getObject(3));
+ assertEquals((long) 4, rr.getLong(allTypesSchema.getColumnByIndex(3).getName()));
assertEquals(true, rr.getBoolean(4));
+ assertEquals(true, rr.getObject(4));
assertEquals(true, rr.getBoolean(allTypesSchema.getColumnByIndex(4).getName()));
assertEquals(5.6f, rr.getFloat(5), .001f);
+ assertEquals(5.6f, (float) rr.getObject(5), .001f);
assertEquals(5.6f, rr.getFloat(allTypesSchema.getColumnByIndex(5).getName()), .001f);
assertEquals(7.8, rr.getDouble(6), .001);
+ assertEquals(7.8, (double) rr.getObject(6), .001);
assertEquals(7.8, rr.getDouble(allTypesSchema.getColumnByIndex(6).getName()), .001f);
assertEquals("string-value", rr.getString(7));
+ assertEquals("string-value", rr.getObject(7));
assertEquals("string-value", rr.getString(allTypesSchema.getColumnByIndex(7).getName()));
assertArrayEquals("binary-array".getBytes(UTF_8), rr.getBinaryCopy(8));
+ assertArrayEquals("binary-array".getBytes(UTF_8), (byte[]) rr.getObject(8));
assertArrayEquals("binary-array".getBytes(UTF_8),
rr.getBinaryCopy(allTypesSchema.getColumnByIndex(8).getName()));
@@ -122,12 +132,15 @@ public class TestRowResult {
assertArrayEquals("bytebuffer".getBytes(UTF_8), rr.getBinaryCopy(9));
assertEquals(true, rr.isNull(10));
+ assertNull(rr.getObject(10));
assertEquals(true, rr.isNull(allTypesSchema.getColumnByIndex(10).getName()));
assertEquals(new Timestamp(11), rr.getTimestamp(11));
+ assertEquals(new Timestamp(11), rr.getObject(11));
assertEquals(new Timestamp(11), rr.getTimestamp(allTypesSchema.getColumnByIndex(11).getName()));
assertEquals(BigDecimal.valueOf(12345, 3), rr.getDecimal(12));
+ assertEquals(BigDecimal.valueOf(12345, 3), rr.getObject(12));
assertEquals(BigDecimal.valueOf(12345, 3), rr.getDecimal(allTypesSchema.getColumnByIndex(12).getName()));
// We test with the column name once since it's the same method for all types, unlike above.
diff --git a/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala b/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
index 356c290..78670a4 100644
--- a/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
+++ b/java/kudu-spark/src/main/scala/org/apache/kudu/spark/kudu/KuduRDD.scala
@@ -160,30 +160,12 @@ private class RowIterator(
currentIterator.hasNext
}
- private def get(rowResult: RowResult, i: Int): Any = {
- if (rowResult.isNull(i)) null
- else
- rowResult.getColumnType(i) match {
- case Type.BOOL => rowResult.getBoolean(i)
- case Type.INT8 => rowResult.getByte(i)
- case Type.INT16 => rowResult.getShort(i)
- case Type.INT32 => rowResult.getInt(i)
- case Type.INT64 => rowResult.getLong(i)
- case Type.UNIXTIME_MICROS => rowResult.getTimestamp(i)
- case Type.FLOAT => rowResult.getFloat(i)
- case Type.DOUBLE => rowResult.getDouble(i)
- case Type.STRING => rowResult.getString(i)
- case Type.BINARY => rowResult.getBinaryCopy(i)
- case Type.DECIMAL => rowResult.getDecimal(i)
- }
- }
-
override def next(): Row = {
val rowResult = currentIterator.next()
val columnCount = rowResult.getColumnProjection.getColumnCount
val columns = Array.ofDim[Any](columnCount)
for (i <- 0 until columnCount) {
- columns(i) = get(rowResult, i)
+ columns(i) = rowResult.getObject(i)
}
Row.fromSeq(columns)
}