You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/04/25 03:21:04 UTC

git commit: [SQL] Add support for parsing indexing into arrays in SQL.

Repository: spark
Updated Branches:
  refs/heads/master 526a518bf -> 4660991e6


[SQL] Add support for parsing indexing into arrays in SQL.

Author: Michael Armbrust <mi...@databricks.com>

Closes #518 from marmbrus/parseArrayIndex and squashes the following commits:

afd2d6b [Michael Armbrust] 100 chars
c3d6026 [Michael Armbrust] Add support for parsing indexing into arrays in SQL.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4660991e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4660991e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4660991e

Branch: refs/heads/master
Commit: 4660991e679eda158a3ae8039b686eae197a71d1
Parents: 526a518
Author: Michael Armbrust <mi...@databricks.com>
Authored: Thu Apr 24 18:21:00 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Thu Apr 24 18:21:00 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/SqlParser.scala   | 10 +++++++---
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala  | 16 ++++++++++++++++
 .../test/scala/org/apache/spark/sql/TestData.scala  |  7 +++++++
 3 files changed, 30 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4660991e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 13a19d0..8c76a3a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst
 import scala.language.implicitConversions
 import scala.util.parsing.combinator.lexical.StdLexical
 import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
 import scala.util.parsing.input.CharArrayReader.EofCh
 
 import org.apache.spark.sql.catalyst.analysis._
@@ -39,7 +40,7 @@ import org.apache.spark.sql.catalyst.types._
  * This is currently included mostly for illustrative purposes.  Users wanting more complete support
  * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
  */
-class SqlParser extends StandardTokenParsers {
+class SqlParser extends StandardTokenParsers with PackratParsers {
   def apply(input: String): LogicalPlan = {
     phrase(query)(new lexical.Scanner(input)) match {
       case Success(r, x) => r
@@ -152,7 +153,7 @@ class SqlParser extends StandardTokenParsers {
 
   lexical.delimiters += (
     "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
-    ",", ";", "%", "{", "}", ":"
+    ",", ";", "%", "{", "}", ":", "[", "]"
   )
 
   protected def assignAliases(exprs: Seq[Expression]): Seq[NamedExpression] = {
@@ -339,7 +340,10 @@ class SqlParser extends StandardTokenParsers {
   protected lazy val floatLit: Parser[String] =
     elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
 
-  protected lazy val baseExpression: Parser[Expression] =
+  protected lazy val baseExpression: PackratParser[Expression] =
+    expression ~ "[" ~  expression <~ "]" ^^ {
+      case base ~ _ ~ ordinal => GetItem(base, ordinal)
+    } |
     TRUE ^^^ Literal(true, BooleanType) |
     FALSE ^^^ Literal(false, BooleanType) |
     cast |

http://git-wip-us.apache.org/repos/asf/spark/blob/4660991e/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4c4fd6d..dde957d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -28,6 +28,22 @@ class SQLQuerySuite extends QueryTest {
   // Make sure the tables are loaded.
   TestData
 
+  test("index into array") {
+    checkAnswer(
+      sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),
+      arrayData.map(d => (d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect().toSeq)
+  }
+
+  test("index into array of arrays") {
+    checkAnswer(
+      sql(
+        "SELECT nestedData, nestedData[0][0], nestedData[0][0] + nestedData[0][1] FROM arrayData"),
+      arrayData.map(d =>
+        (d.nestedData,
+         d.nestedData(0)(0),
+         d.nestedData(0)(0) + d.nestedData(0)(1))).collect().toSeq)
+  }
+
   test("agg") {
     checkAnswer(
       sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"),

http://git-wip-us.apache.org/repos/asf/spark/blob/4660991e/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 271b1d9..002b7f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -66,4 +66,11 @@ object TestData {
       LowerCaseData(3, "c") ::
       LowerCaseData(4, "d") :: Nil)
   lowerCaseData.registerAsTable("lowerCaseData")
+
+  case class ArrayData(data: Seq[Int], nestedData: Seq[Seq[Int]])
+  val arrayData =
+    TestSQLContext.sparkContext.parallelize(
+      ArrayData(Seq(1,2,3), Seq(Seq(1,2,3))) ::
+      ArrayData(Seq(2,3,4), Seq(Seq(2,3,4))) :: Nil)
+  arrayData.registerAsTable("arrayData")
 }