You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/09/27 21:10:20 UTC
git commit: [SPARK-3680][SQL] Fix bug caused by eager typing of
HiveGenericUDFs
Repository: spark
Updated Branches:
refs/heads/master 080088105 -> f0c7e1955
[SPARK-3680][SQL] Fix bug caused by eager typing of HiveGenericUDFs
Typing of UDFs should be lazy as it is often not valid to call `dataType` on an expression until after all of its children are `resolved`.
Author: Michael Armbrust <mi...@databricks.com>
Closes #2525 from marmbrus/concatBug and squashes the following commits:
5b8efe7 [Michael Armbrust] fix bug with eager typing of udfs
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0c7e195
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0c7e195
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0c7e195
Branch: refs/heads/master
Commit: f0c7e19550d46f81a0a3ff272bbf66ce4bafead6
Parents: 0800881
Author: Michael Armbrust <mi...@databricks.com>
Authored: Sat Sep 27 12:10:16 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Sat Sep 27 12:10:16 2014 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/hive/hiveUdfs.scala | 2 +-
.../spark/sql/parquet/ParquetMetastoreSuite.scala | 15 +++++++++++----
2 files changed, 12 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f0c7e195/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 68944ed..732e497 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -151,7 +151,7 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
override def get(): AnyRef = wrap(func())
}
- val dataType: DataType = inspectorToDataType(returnInspector)
+ lazy val dataType: DataType = inspectorToDataType(returnInspector)
override def eval(input: Row): Any = {
returnInspector // Make sure initialized.
http://git-wip-us.apache.org/repos/asf/spark/blob/f0c7e195/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
index e380280..86adbbf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet
import java.io.File
+import org.apache.spark.sql.catalyst.expressions.Row
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.sql.QueryTest
@@ -142,15 +143,21 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
test("sum") {
checkAnswer(
sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"),
- 1 + 2 + 3
- )
+ 1 + 2 + 3)
+ }
+
+ test("hive udfs") {
+ checkAnswer(
+ sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"),
+ sql("SELECT stringField FROM partitioned_parquet").map {
+ case Row(s: String) => Row(s + s)
+ }.collect().toSeq)
}
test("non-part select(*)") {
checkAnswer(
sql("SELECT COUNT(*) FROM normal_parquet"),
- 10
- )
+ 10)
}
test("conversion is working") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org