You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2015/06/17 18:01:03 UTC
spark git commit: [SPARK-7199] [SQL] Add date and timestamp support
to UnsafeRow
Repository: spark
Updated Branches:
refs/heads/master c13da20a5 -> 104f30c36
[SPARK-7199] [SQL] Add date and timestamp support to UnsafeRow
JIRA: https://issues.apache.org/jira/browse/SPARK-7199
Author: Liang-Chi Hsieh <vi...@gmail.com>
Closes #5984 from viirya/add_date_timestamp and squashes the following commits:
7f21ce9 [Liang-Chi Hsieh] For comment.
0b89698 [Liang-Chi Hsieh] Add timestamp to settableFieldTypes.
c30d490 [Liang-Chi Hsieh] Use default IntUnsafeColumnWriter and LongUnsafeColumnWriter.
672ef17 [Liang-Chi Hsieh] Remove getter/setter for Date and Timestamp and use Int and Long for them.
9f3e577 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp
281e844 [Liang-Chi Hsieh] Fix scala style.
fb532b5 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp
80af342 [Liang-Chi Hsieh] Fix compiling error.
f4f5de6 [Liang-Chi Hsieh] Fix scala style.
a463e83 [Liang-Chi Hsieh] Use Long to store timestamp for rows.
635388a [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp
46946c6 [Liang-Chi Hsieh] Adapt for moved DateUtils.
b16994e [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into add_date_timestamp
752251f [Liang-Chi Hsieh] Support setDate. Fix failed test.
fcf8db9 [Liang-Chi Hsieh] Add functions for Date and Timestamp to SpecificRow.
e42a809 [Liang-Chi Hsieh] Fix style.
4c07b57 [Liang-Chi Hsieh] Add date and timestamp support to UnsafeRow.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/104f30c3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/104f30c3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/104f30c3
Branch: refs/heads/master
Commit: 104f30c36f3d44b7567f6f77adb92e0a96494541
Parents: c13da20
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Wed Jun 17 09:00:37 2015 -0700
Committer: Davies Liu <da...@databricks.com>
Committed: Wed Jun 17 09:00:37 2015 -0700
----------------------------------------------------------------------
.../sql/catalyst/expressions/UnsafeRow.java | 6 ++--
.../expressions/UnsafeRowConverter.scala | 4 +++
.../spark/sql/catalyst/expressions/rows.scala | 3 +-
.../expressions/UnsafeRowConverterSuite.scala | 30 ++++++++++++++++++++
4 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/104f30c3/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index aec88c9..c4b7f84 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -103,7 +103,9 @@ public final class UnsafeRow extends BaseMutableRow {
IntegerType,
LongType,
FloatType,
- DoubleType
+ DoubleType,
+ DateType,
+ TimestampType
})));
// We support get() on a superset of the types for which we support set():
@@ -331,8 +333,6 @@ public final class UnsafeRow extends BaseMutableRow {
return getUTF8String(i).toString();
}
-
-
@Override
public InternalRow copy() {
throw new UnsupportedOperationException();
http://git-wip-us.apache.org/repos/asf/spark/blob/104f30c3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala
index 5c92f41..72f740e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.expressions
+import org.apache.spark.sql.catalyst.util.DateUtils
+import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.PlatformDependent
import org.apache.spark.unsafe.array.ByteArrayMethods
@@ -120,6 +122,8 @@ private object UnsafeColumnWriter {
case FloatType => FloatUnsafeColumnWriter
case DoubleType => DoubleUnsafeColumnWriter
case StringType => StringUnsafeColumnWriter
+ case DateType => IntUnsafeColumnWriter
+ case TimestampType => LongUnsafeColumnWriter
case t =>
throw new UnsupportedOperationException(s"Do not know how to write columns of type $t")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/104f30c3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index 534dac1..1098962 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -197,9 +197,10 @@ class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
- override def setString(ordinal: Int, value: String) {
+ override def setString(ordinal: Int, value: String): Unit = {
values(ordinal) = UTF8String.fromString(value)
}
+
override def setNullAt(i: Int): Unit = { values(i) = null }
override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
http://git-wip-us.apache.org/repos/asf/spark/blob/104f30c3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index 577c7a0..721ef8a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -17,12 +17,14 @@
package org.apache.spark.sql.catalyst.expressions
+import java.sql.{Date, Timestamp}
import java.util.Arrays
import org.scalatest.Matchers
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.unsafe.PlatformDependent
import org.apache.spark.unsafe.array.ByteArrayMethods
@@ -74,6 +76,34 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
unsafeRow.getString(2) should be ("World")
}
+ test("basic conversion with primitive, string, date and timestamp types") {
+ val fieldTypes: Array[DataType] = Array(LongType, StringType, DateType, TimestampType)
+ val converter = new UnsafeRowConverter(fieldTypes)
+
+ val row = new SpecificMutableRow(fieldTypes)
+ row.setLong(0, 0)
+ row.setString(1, "Hello")
+ row.update(2, DateUtils.fromJavaDate(Date.valueOf("1970-01-01")))
+ row.update(3, DateUtils.fromJavaTimestamp(Timestamp.valueOf("2015-05-08 08:10:25")))
+
+ val sizeRequired: Int = converter.getSizeRequirement(row)
+ sizeRequired should be (8 + (8 * 4) +
+ ByteArrayMethods.roundNumberOfBytesToNearestWord("Hello".getBytes.length + 8))
+ val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
+ val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET)
+ numBytesWritten should be (sizeRequired)
+
+ val unsafeRow = new UnsafeRow()
+ unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, null)
+ unsafeRow.getLong(0) should be (0)
+ unsafeRow.getString(1) should be ("Hello")
+ // Date is represented as Int in unsafeRow
+ DateUtils.toJavaDate(unsafeRow.getInt(2)) should be (Date.valueOf("1970-01-01"))
+ // Timestamp is represented as Long in unsafeRow
+ DateUtils.toJavaTimestamp(unsafeRow.getLong(3)) should be
+ (Timestamp.valueOf("2015-05-08 08:10:25"))
+ }
+
test("null handling") {
val fieldTypes: Array[DataType] = Array(
NullType,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org