You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/08/20 01:14:48 UTC

spark git commit: [SPARK-17149][SQL] array.sql for testing array related functions

Repository: spark
Updated Branches:
  refs/heads/master acac7a508 -> a117afa7c


[SPARK-17149][SQL] array.sql for testing array related functions

## What changes were proposed in this pull request?
This patch creates array.sql in SQLQueryTestSuite for testing array related functions, including:

- indexing
- array creation
- size
- array_contains
- sort_array

## How was this patch tested?
The patch itself is about adding tests.

Author: petermaxlee <pe...@gmail.com>

Closes #14708 from petermaxlee/SPARK-17149.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a117afa7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a117afa7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a117afa7

Branch: refs/heads/master
Commit: a117afa7c2d94f943106542ec53d74ba2b5f1058
Parents: acac7a5
Author: petermaxlee <pe...@gmail.com>
Authored: Fri Aug 19 18:14:45 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Aug 19 18:14:45 2016 -0700

----------------------------------------------------------------------
 .../catalyst/analysis/FunctionRegistry.scala    |  12 +-
 .../test/resources/sql-tests/inputs/array.sql   |  86 +++++++++++
 .../resources/sql-tests/results/array.sql.out   | 144 +++++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala    |  16 ---
 .../apache/spark/sql/SQLQueryTestSuite.scala    |  10 ++
 .../hive/execution/HiveCompatibilitySuite.scala |   4 +-
 .../sql/hive/execution/HiveQuerySuite.scala     |   9 --
 7 files changed, 248 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c5f91c1..35fd800 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -161,7 +161,6 @@ object FunctionRegistry {
   val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map(
     // misc non-aggregate functions
     expression[Abs]("abs"),
-    expression[CreateArray]("array"),
     expression[Coalesce]("coalesce"),
     expression[Explode]("explode"),
     expression[Greatest]("greatest"),
@@ -172,10 +171,6 @@ object FunctionRegistry {
     expression[IsNull]("isnull"),
     expression[IsNotNull]("isnotnull"),
     expression[Least]("least"),
-    expression[CreateMap]("map"),
-    expression[MapKeys]("map_keys"),
-    expression[MapValues]("map_values"),
-    expression[CreateNamedStruct]("named_struct"),
     expression[NaNvl]("nanvl"),
     expression[NullIf]("nullif"),
     expression[Nvl]("nvl"),
@@ -184,7 +179,6 @@ object FunctionRegistry {
     expression[Rand]("rand"),
     expression[Randn]("randn"),
     expression[Stack]("stack"),
-    expression[CreateStruct]("struct"),
     expression[CaseWhen]("when"),
 
     // math functions
@@ -354,9 +348,15 @@ object FunctionRegistry {
     expression[TimeWindow]("window"),
 
     // collection functions
+    expression[CreateArray]("array"),
     expression[ArrayContains]("array_contains"),
+    expression[CreateMap]("map"),
+    expression[CreateNamedStruct]("named_struct"),
+    expression[MapKeys]("map_keys"),
+    expression[MapValues]("map_values"),
     expression[Size]("size"),
     expression[SortArray]("sort_array"),
+    expression[CreateStruct]("struct"),
 
     // misc functions
     expression[AssertTrue]("assert_true"),

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/core/src/test/resources/sql-tests/inputs/array.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
new file mode 100644
index 0000000..4038a0d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -0,0 +1,86 @@
+-- test cases for array functions
+
+create temporary view data as select * from values
+  ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+  ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+  as data(a, b, c);
+
+select * from data;
+
+-- index into array
+select a, b[0], b[0] + b[1] from data;
+
+-- index into array of arrays
+select a, c[0][0] + c[0][0 + 1] from data;
+
+
+create temporary view primitive_arrays as select * from values (
+  array(true),
+  array(2Y, 1Y),
+  array(2S, 1S),
+  array(2, 1),
+  array(2L, 1L),
+  array(9223372036854775809, 9223372036854775808),
+  array(2.0D, 1.0D),
+  array(float(2.0), float(1.0)),
+  array(date '2016-03-14', date '2016-03-13'),
+  array(timestamp '2016-11-15 20:54:00.000',  timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+  boolean_array,
+  tinyint_array,
+  smallint_array,
+  int_array,
+  bigint_array,
+  decimal_array,
+  double_array,
+  float_array,
+  date_array,
+  timestamp_array
+);
+
+select * from primitive_arrays;
+
+-- array_contains on all primitive types: result should alternate between true and false
+select
+  array_contains(boolean_array, true), array_contains(boolean_array, false),
+  array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+  array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+  array_contains(int_array, 2), array_contains(int_array, 0),
+  array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+  array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+  array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+  array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+  array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+  array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays;
+
+-- array_contains on nested arrays
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data;
+
+-- sort_array
+select
+  sort_array(boolean_array),
+  sort_array(tinyint_array),
+  sort_array(smallint_array),
+  sort_array(int_array),
+  sort_array(bigint_array),
+  sort_array(decimal_array),
+  sort_array(double_array),
+  sort_array(float_array),
+  sort_array(date_array),
+  sort_array(timestamp_array)
+from primitive_arrays;
+
+-- size
+select
+  size(boolean_array),
+  size(tinyint_array),
+  size(smallint_array),
+  size(int_array),
+  size(bigint_array),
+  size(decimal_array),
+  size(double_array),
+  size(float_array),
+  size(date_array),
+  size(timestamp_array)
+from primitive_arrays;

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/core/src/test/resources/sql-tests/results/array.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
new file mode 100644
index 0000000..4a1d149
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -0,0 +1,144 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+create temporary view data as select * from values
+  ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+  ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+  as data(a, b, c)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select * from data
+-- !query 1 schema
+struct<a:string,b:array<int>,c:array<array<int>>>
+-- !query 1 output
+one	[11,12,13]	[[111,112,113],[121,122,123]]
+two	[21,22,23]	[[211,212,213],[221,222,223]]
+
+
+-- !query 2
+select a, b[0], b[0] + b[1] from data
+-- !query 2 schema
+struct<a:string,b[0]:int,(b[0] + b[1]):int>
+-- !query 2 output
+one	11	23
+two	21	43
+
+
+-- !query 3
+select a, c[0][0] + c[0][0 + 1] from data
+-- !query 3 schema
+struct<a:string,(c[0][0] + c[0][(0 + 1)]):int>
+-- !query 3 output
+one	223
+two	423
+
+
+-- !query 4
+create temporary view primitive_arrays as select * from values (
+  array(true),
+  array(2Y, 1Y),
+  array(2S, 1S),
+  array(2, 1),
+  array(2L, 1L),
+  array(9223372036854775809, 9223372036854775808),
+  array(2.0D, 1.0D),
+  array(float(2.0), float(1.0)),
+  array(date '2016-03-14', date '2016-03-13'),
+  array(timestamp '2016-11-15 20:54:00.000',  timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+  boolean_array,
+  tinyint_array,
+  smallint_array,
+  int_array,
+  bigint_array,
+  decimal_array,
+  double_array,
+  float_array,
+  date_array,
+  timestamp_array
+)
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+select * from primitive_arrays
+-- !query 5 schema
+struct<boolean_array:array<boolean>,tinyint_array:array<tinyint>,smallint_array:array<smallint>,int_array:array<int>,bigint_array:array<bigint>,decimal_array:array<decimal(19,0)>,double_array:array<double>,float_array:array<float>,date_array:array<date>,timestamp_array:array<timestamp>>
+-- !query 5 output
+[true]	[2,1]	[2,1]	[2,1]	[2,1]	[9223372036854775809,9223372036854775808]	[2.0,1.0]	[2.0,1.0]	[2016-03-14,2016-03-13]	[2016-11-15 20:54:00.0,2016-11-12 20:54:00.0]
+
+
+-- !query 6
+select
+  array_contains(boolean_array, true), array_contains(boolean_array, false),
+  array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+  array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+  array_contains(int_array, 2), array_contains(int_array, 0),
+  array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+  array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+  array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+  array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+  array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+  array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays
+-- !query 6 schema
+struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP('2016-11-15 20:54:00.0')):boolean,array_contains(timestamp_array, TIMESTAMP('2016-01-01 20:54:00.0')):boolean>
+-- !query 6 output
+true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false
+
+
+-- !query 7
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data
+-- !query 7 schema
+struct<array_contains(b, 11):boolean,array_contains(c, array(111, 112, 113)):boolean>
+-- !query 7 output
+false	false
+true	true
+
+
+-- !query 8
+select
+  sort_array(boolean_array),
+  sort_array(tinyint_array),
+  sort_array(smallint_array),
+  sort_array(int_array),
+  sort_array(bigint_array),
+  sort_array(decimal_array),
+  sort_array(double_array),
+  sort_array(float_array),
+  sort_array(date_array),
+  sort_array(timestamp_array)
+from primitive_arrays
+-- !query 8 schema
+struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array, true):array<tinyint>,sort_array(smallint_array, true):array<smallint>,sort_array(int_array, true):array<int>,sort_array(bigint_array, true):array<bigint>,sort_array(decimal_array, true):array<decimal(19,0)>,sort_array(double_array, true):array<double>,sort_array(float_array, true):array<float>,sort_array(date_array, true):array<date>,sort_array(timestamp_array, true):array<timestamp>>
+-- !query 8 output
+[true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
+
+
+-- !query 9
+select
+  size(boolean_array),
+  size(tinyint_array),
+  size(smallint_array),
+  size(int_array),
+  size(bigint_array),
+  size(decimal_array),
+  size(double_array),
+  size(float_array),
+  size(date_array),
+  size(timestamp_array)
+from primitive_arrays
+-- !query 9 schema
+struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
+-- !query 9 output
+1	2	2	2	2	2	2	2	2	2

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4fcde58..eac266c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -445,12 +445,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       Nil)
   }
 
-  test("index into array") {
-    checkAnswer(
-      sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),
-      arrayData.map(d => Row(d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect())
-  }
-
   test("left semi greater than predicate") {
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       checkAnswer(
@@ -472,16 +466,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     )
   }
 
-  test("index into array of arrays") {
-    checkAnswer(
-      sql(
-        "SELECT nestedData, nestedData[0][0], nestedData[0][0] + nestedData[0][1] FROM arrayData"),
-      arrayData.map(d =>
-        Row(d.nestedData,
-         d.nestedData(0)(0),
-         d.nestedData(0)(0) + d.nestedData(0)(1))).collect().toSeq)
-  }
-
   test("agg") {
     checkAnswer(
       sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"),

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 069a9b6..55d5a56 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -35,6 +35,16 @@ import org.apache.spark.sql.types.StructType
  * Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs".
  * Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results".
  *
+ * To run the entire test suite:
+ * {{{
+ *   build/sbt "sql/test-only *SQLQueryTestSuite"
+ * }}}
+ *
+ * To run a single test file upon change:
+ * {{{
+ *   build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql"
+ * }}}
+ *
  * To re-generate golden files, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 13d18fd..a54d234 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -979,8 +979,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_PI",
     "udf_acos",
     "udf_add",
-    "udf_array",
-    "udf_array_contains",
+    // "udf_array",  -- done in array.sql
+    // "udf_array_contains",  -- done in array.sql
     "udf_ascii",
     "udf_asin",
     "udf_atan",

http://git-wip-us.apache.org/repos/asf/spark/blob/a117afa7/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 6785167..3c7dbb4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -216,15 +216,6 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     assert(new Timestamp(1000) == r1.getTimestamp(0))
   }
 
-  createQueryTest("constant array",
-  """
-    |SELECT sort_array(
-    |  sort_array(
-    |    array("hadoop distributed file system",
-    |          "enterprise databases", "hadoop map-reduce")))
-    |FROM src LIMIT 1;
-  """.stripMargin)
-
   createQueryTest("null case",
     "SELECT case when(true) then 1 else null end FROM src LIMIT 1")
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org