You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by ey...@apache.org on 2018/12/05 13:59:42 UTC
datafu git commit: Fix tests for Spark 2.3.0
Repository: datafu
Updated Branches:
refs/heads/spark-tmp 8c2d55d8e -> a6d3c5504
Fix tests for Spark 2.3.0
Project: http://git-wip-us.apache.org/repos/asf/datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/datafu/commit/a6d3c550
Tree: http://git-wip-us.apache.org/repos/asf/datafu/tree/a6d3c550
Diff: http://git-wip-us.apache.org/repos/asf/datafu/diff/a6d3c550
Branch: refs/heads/spark-tmp
Commit: a6d3c5504e21befde193b34cf0a00d4212eaa611
Parents: 8c2d55d
Author: Eyal Allweil <ey...@apache.org>
Authored: Wed Dec 5 15:59:17 2018 +0200
Committer: Eyal Allweil <ey...@apache.org>
Committed: Wed Dec 5 15:59:17 2018 +0200
----------------------------------------------------------------------
.gitignore | 2 ++
build.gradle | 4 +++-
.../scala/datafu/spark/TestSparkDFUtils.scala | 18 +++++++++---------
.../test/scala/datafu/spark/TestSparkUDAFs.scala | 17 ++++++++++++++---
4 files changed, 28 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/datafu/blob/a6d3c550/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 09885f6..a4e88b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,5 @@ datafu-pig/query
gradle/wrapper/gradle-wrapper.jar
gradle/wrapper/gradle-wrapper.properties
gradlew*
+datafu-spark/spark-warehouse
+datafu-spark/metastore_db
http://git-wip-us.apache.org/repos/asf/datafu/blob/a6d3c550/build.gradle
----------------------------------------------------------------------
diff --git a/build.gradle b/build.gradle
index d26b980..a219acb 100644
--- a/build.gradle
+++ b/build.gradle
@@ -93,7 +93,9 @@ rat {
'datafu-pig/docs',
'datafu-pig/queries',
'datafu-pig/query',
- 'datafu-spark/derby.log'
+ 'datafu-spark/metastore_db/**',
+ 'datafu-spark/spark-warehouse/**',
+ 'datafu-spark/derby.log'
]
}
http://git-wip-us.apache.org/repos/asf/datafu/blob/a6d3c550/datafu-spark/src/test/scala/datafu/spark/TestSparkDFUtils.scala
----------------------------------------------------------------------
diff --git a/datafu-spark/src/test/scala/datafu/spark/TestSparkDFUtils.scala b/datafu-spark/src/test/scala/datafu/spark/TestSparkDFUtils.scala
index 37a2270..861a1c0 100644
--- a/datafu-spark/src/test/scala/datafu/spark/TestSparkDFUtils.scala
+++ b/datafu-spark/src/test/scala/datafu/spark/TestSparkDFUtils.scala
@@ -150,21 +150,21 @@ class DataFrameOpsTests extends FunSuite with DataFrameSuiteBase {
val notSkewed = sqlContext.createDataFrame((1 to 10).map(i => (i.toString, s"str$i"))).toDF("key", "val")
val expected = sqlContext.createDataFrame(List(
- ("2","str2", "k"),
- ("1","str1", "e"),
- ("1","str1", "d"),
- ("1","str1", "c"),
+ ("1","str1", "a"),
("1","str1", "b"),
- ("1","str1", "a")
+ ("1","str1", "c"),
+ ("1","str1", "d"),
+ ("1","str1", "e"),
+ ("2","str2", "k")
)).toDF("key","val","val_skewed")
val actual1 = notSkewed.broadcastJoinSkewed(skewed,"key", 1)
- assertDataFrameEquals(expected, actual1)
+ assertDataFrameEquals(expected, actual1.sort($"val_skewed"))
val actual2 = notSkewed.broadcastJoinSkewed(skewed,"key", 2)
- assertDataFrameEquals(expected, actual2)
+ assertDataFrameEquals(expected, actual2.sort($"val_skewed"))
}
// because of nulls in expected data, an actual schema needs to be used
@@ -186,7 +186,7 @@ class DataFrameOpsTests extends FunSuite with DataFrameSuiteBase {
("2","k","2","str2")
)).toDF("key","val_skewed","key","val")
- assertDataFrameEquals(expected1, actual1)
+ assertDataFrameEquals(expected1, actual1.sort($"val_skewed")) // assertDataFrameEquals cares about order but we don't
val actual2 = skewed.as("a").joinSkewed(notSkewed.as("b"),expr("a.key = b.key"), 3, "left_outer")
@@ -200,7 +200,7 @@ class DataFrameOpsTests extends FunSuite with DataFrameSuiteBase {
expJoinSkewed("0","k",null,null)
)).toDF("key","val_skewed","key","val")
- assertDataFrameEquals(expected2, actual2)
+ assertDataFrameEquals(expected2, actual2.sort($"val_skewed")) // assertDataFrameEquals cares about order but we don't
}
test("test_changeSchema") {
http://git-wip-us.apache.org/repos/asf/datafu/blob/a6d3c550/datafu-spark/src/test/scala/datafu/spark/TestSparkUDAFs.scala
----------------------------------------------------------------------
diff --git a/datafu-spark/src/test/scala/datafu/spark/TestSparkUDAFs.scala b/datafu-spark/src/test/scala/datafu/spark/TestSparkUDAFs.scala
index 7a1eac9..0b77176 100644
--- a/datafu-spark/src/test/scala/datafu/spark/TestSparkUDAFs.scala
+++ b/datafu-spark/src/test/scala/datafu/spark/TestSparkUDAFs.scala
@@ -27,11 +27,20 @@ import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner
import org.slf4j.LoggerFactory
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@RunWith(classOf[JUnitRunner])
class UdafTests extends FunSuite with DataFrameSuiteBase {
import spark.implicits._
+
+ /**
+ * taken from https://github.com/holdenk/spark-testing-base/issues/234#issuecomment-390150835
+ *
+ * Solves problem with Hive in Spark 2.3.0 in spark-testing-base
+ */
+ override def conf: SparkConf = super.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
val logger = LoggerFactory.getLogger(this.getClass)
@@ -64,9 +73,11 @@ class UdafTests extends FunSuite with DataFrameSuiteBase {
spark.sql("insert into table mas_table select case when 1=2 then array('asd') end from (select 1)z")
spark.sql("insert into table mas_table select case when 1=2 then array('asd') end from (select 1)z")
- assertDataFrameEquals(
- sqlContext.createDataFrame(List(mapExp(Map()))),
- spark.table("mas_table").groupBy().agg(mas($"arr").as("map_col")))
+ val expected = sqlContext.createDataFrame(List(mapExp(Map())))
+
+ val actual = spark.table("mas_table").groupBy().agg(mas($"arr").as("map_col"))
+
+ assertDataFrameEquals(expected, actual)
}
test("test multiarrayset max keys") {