You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@spark.apache.org by an...@apache.org on 2016/03/04 19:32:05 UTC

[1/3] spark git commit: [SPARK-13633][SQL] Move things into catalyst.parser package

Repository: spark
Updated Branches:
  refs/heads/master 83302c3bf -> b7d414742


http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
deleted file mode 100644
index bebf708..0000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql.catalyst.util
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.types._
-
-class DataTypeParserSuite extends SparkFunSuite {
-
-  def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
-    test(s"parse ${dataTypeString.replace("\n", "")}") {
-      assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
-    }
-  }
-
-  def unsupported(dataTypeString: String): Unit = {
-    test(s"$dataTypeString is not supported") {
-      intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
-    }
-  }
-
-  checkDataType("int", IntegerType)
-  checkDataType("integer", IntegerType)
-  checkDataType("BooLean", BooleanType)
-  checkDataType("tinYint", ByteType)
-  checkDataType("smallINT", ShortType)
-  checkDataType("INT", IntegerType)
-  checkDataType("INTEGER", IntegerType)
-  checkDataType("bigint", LongType)
-  checkDataType("float", FloatType)
-  checkDataType("dOUBle", DoubleType)
-  checkDataType("decimal(10, 5)", DecimalType(10, 5))
-  checkDataType("decimal", DecimalType.USER_DEFAULT)
-  checkDataType("DATE", DateType)
-  checkDataType("timestamp", TimestampType)
-  checkDataType("string", StringType)
-  checkDataType("ChaR(5)", StringType)
-  checkDataType("varchAr(20)", StringType)
-  checkDataType("cHaR(27)", StringType)
-  checkDataType("BINARY", BinaryType)
-
-  checkDataType("array<doublE>", ArrayType(DoubleType, true))
-  checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
-  checkDataType(
-    "array<struct<tinYint:tinyint>>",
-    ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
-  )
-  checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
-  checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
-  checkDataType(
-    "MAP<int, struct<varchar:string>>",
-    MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
-  )
-
-  checkDataType(
-    "struct<intType: int, ts:timestamp>",
-    StructType(
-      StructField("intType", IntegerType, true) ::
-      StructField("ts", TimestampType, true) :: Nil)
-  )
-  // It is fine to use the data type string as the column name.
-  checkDataType(
-    "Struct<int: int, timestamp:timestamp>",
-    StructType(
-      StructField("int", IntegerType, true) ::
-      StructField("timestamp", TimestampType, true) :: Nil)
-  )
-  checkDataType(
-    """
-      |struct<
-      |  struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
-      |  MAP:Map<timestamp, varchar(10)>,
-      |  arrAy:Array<double>,
-      |  anotherArray:Array<char(9)>>
-    """.stripMargin,
-    StructType(
-      StructField("struct",
-        StructType(
-          StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
-          StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
-      StructField("MAP", MapType(TimestampType, StringType), true) ::
-      StructField("arrAy", ArrayType(DoubleType, true), true) ::
-      StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
-  )
-  // A column name can be a reserved word in our DDL parser and SqlParser.
-  checkDataType(
-    "Struct<TABLE: string, CASE:boolean>",
-    StructType(
-      StructField("TABLE", StringType, true) ::
-      StructField("CASE", BooleanType, true) :: Nil)
-  )
-  // Use backticks to quote column names having special characters.
-  checkDataType(
-    "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
-    StructType(
-      StructField("x+y", IntegerType, true) ::
-      StructField("!@#$%^&*()", StringType, true) ::
-      StructField("1_2.345<>:\"", StringType, true) :: Nil)
-  )
-  // Empty struct.
-  checkDataType("strUCt<>", StructType(Nil))
-
-  unsupported("it is not a data type")
-  unsupported("struct<x+y: int, 1.1:timestamp>")
-  unsupported("struct<x: int")
-  unsupported("struct<x int, y string>")
-  unsupported("struct<`x``y` int>")
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 6c7929c..0fa8159 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -24,7 +24,8 @@ import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.{usePrettyExpression, DataTypeParser}
+import org.apache.spark.sql.catalyst.parser.DataTypeParser
+import org.apache.spark.sql.catalyst.util.usePrettyExpression
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 39dad16..c742bf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution._

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
index bc690f6..9143258 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalyst.{CatalystQl, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.parser.{ASTNode, ParserConf, SimpleParserConf}
+import org.apache.spark.sql.catalyst.parser.{ASTNode, CatalystQl, ParserConf, SimpleParserConf}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
index 7ea098c..b8af832 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
@@ -45,7 +45,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.{RDD, SqlNewHadoopPartition, SqlNewHadoopRDD}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.LegacyTypeStringParser
+import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser
 import org.apache.spark.sql.execution.datasources.{PartitionSpec, _}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index b9873d3..86412c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -22,11 +22,12 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag}
 import scala.util.Try
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.{CatalystQl, ScalaReflection}
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.parser.CatalystQl
 import org.apache.spark.sql.catalyst.plans.logical.BroadcastHint
 import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.types._

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index f93a405..f5f3654 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.sql.{ContinuousQueryManager, SQLContext, UDFRegistration}
-import org.apache.spark.sql.catalyst.ParserInterface
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, Catalog, FunctionRegistry, SimpleCatalog}
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources.{PreInsertCastAndRename, ResolveDataSource}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index ee8ec2d..a053108 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -36,10 +36,10 @@ import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{Catalog, MultiInstanceRelation, OverrideCatalog}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.DataTypeParser
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.util.DataTypeParser
 import org.apache.spark.sql.execution.{datasources, FileRelation}
 import org.apache.spark.sql.execution.datasources.{Partition => ParquetPartition, _}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 0d4b79f..8207e78 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.hive
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.ParserInterface
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, OverrideCatalog}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.execution.{python, SparkPlanner}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{SessionState, SQLConf}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[2/3] spark git commit: [SPARK-13633][SQL] Move things into catalyst.parser package

Posted by an...@apache.org.

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
new file mode 100644
index 0000000..d231841
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
@@ -0,0 +1,1012 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import java.sql.Date
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.random.RandomSampler
+
+
+/**
+ * This class translates SQL to Catalyst [[LogicalPlan]]s or [[Expression]]s.
+ */
+private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserInterface {
+  object Token {
+    def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
+      CurrentOrigin.setPosition(node.line, node.positionInLine)
+      node.pattern
+    }
+  }
+
+  /**
+   * The safeParse method allows a user to focus on the parsing/AST transformation logic. This
+   * method will take care of possible errors during the parsing process.
+   */
+  protected def safeParse[T](sql: String, ast: ASTNode)(toResult: ASTNode => T): T = {
+    try {
+      toResult(ast)
+    } catch {
+      case e: MatchError => throw e
+      case e: AnalysisException => throw e
+      case e: Exception =>
+        throw new AnalysisException(e.getMessage)
+      case e: NotImplementedError =>
+        throw new AnalysisException(
+          s"""Unsupported language features in query
+             |== SQL ==
+             |$sql
+             |== AST ==
+             |${ast.treeString}
+             |== Error ==
+             |$e
+             |== Stacktrace ==
+             |${e.getStackTrace.head}
+          """.stripMargin)
+    }
+  }
+
+  /** Creates LogicalPlan for a given SQL string. */
+  def parsePlan(sql: String): LogicalPlan =
+    safeParse(sql, ParseDriver.parsePlan(sql, conf))(nodeToPlan)
+
+  /** Creates Expression for a given SQL string. */
+  def parseExpression(sql: String): Expression =
+    safeParse(sql, ParseDriver.parseExpression(sql, conf))(selExprNodeToExpr(_).get)
+
+  /** Creates TableIdentifier for a given SQL string. */
+  def parseTableIdentifier(sql: String): TableIdentifier =
+    safeParse(sql, ParseDriver.parseTableName(sql, conf))(extractTableIdent)
+
+  def parseDdl(sql: String): Seq[Attribute] = {
+    safeParse(sql, ParseDriver.parseExpression(sql, conf)) { ast =>
+      val Token("TOK_CREATETABLE", children) = ast
+      children
+        .find(_.text == "TOK_TABCOLLIST")
+        .getOrElse(sys.error("No columnList!"))
+        .flatMap(_.children.map(nodeToAttribute))
+    }
+  }
+
+  protected def getClauses(
+      clauseNames: Seq[String],
+      nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
+    var remainingNodes = nodeList
+    val clauses = clauseNames.map { clauseName =>
+      val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase == clauseName)
+      remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil)
+      matches.headOption
+    }
+
+    if (remainingNodes.nonEmpty) {
+      sys.error(
+        s"""Unhandled clauses: ${remainingNodes.map(_.treeString).mkString("\n")}.
+            |You are likely trying to use an unsupported Hive feature."""".stripMargin)
+    }
+    clauses
+  }
+
+  protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): ASTNode =
+    getClauseOption(clauseName, nodeList).getOrElse(sys.error(
+      s"Expected clause $clauseName missing from ${nodeList.map(_.treeString).mkString("\n")}"))
+
+  protected def getClauseOption(clauseName: String, nodeList: Seq[ASTNode]): Option[ASTNode] = {
+    nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
+      case Seq(oneMatch) => Some(oneMatch)
+      case Seq() => None
+      case _ => sys.error(s"Found multiple instances of clause $clauseName")
+    }
+  }
+
+  protected def nodeToAttribute(node: ASTNode): Attribute = node match {
+    case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
+      AttributeReference(colName, nodeToDataType(dataType), nullable = true)()
+    case _ =>
+      noParseRule("Attribute", node)
+  }
+
+  protected def nodeToDataType(node: ASTNode): DataType = node match {
+    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+      DecimalType(precision.text.toInt, scale.text.toInt)
+    case Token("TOK_DECIMAL", precision :: Nil) =>
+      DecimalType(precision.text.toInt, 0)
+    case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
+    case Token("TOK_BIGINT", Nil) => LongType
+    case Token("TOK_INT", Nil) => IntegerType
+    case Token("TOK_TINYINT", Nil) => ByteType
+    case Token("TOK_SMALLINT", Nil) => ShortType
+    case Token("TOK_BOOLEAN", Nil) => BooleanType
+    case Token("TOK_STRING", Nil) => StringType
+    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
+    case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
+    case Token("TOK_FLOAT", Nil) => FloatType
+    case Token("TOK_DOUBLE", Nil) => DoubleType
+    case Token("TOK_DATE", Nil) => DateType
+    case Token("TOK_TIMESTAMP", Nil) => TimestampType
+    case Token("TOK_BINARY", Nil) => BinaryType
+    case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
+    case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
+      StructType(fields.map(nodeToStructField))
+    case Token("TOK_MAP", keyType :: valueType :: Nil) =>
+      MapType(nodeToDataType(keyType), nodeToDataType(valueType))
+    case _ =>
+      noParseRule("DataType", node)
+  }
+
+  protected def nodeToStructField(node: ASTNode): StructField = node match {
+    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
+      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
+    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
+      val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
+      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
+    case _ =>
+      noParseRule("StructField", node)
+  }
+
+  protected def extractTableIdent(tableNameParts: ASTNode): TableIdentifier = {
+    tableNameParts.children.map {
+      case Token(part, Nil) => cleanIdentifier(part)
+    } match {
+      case Seq(tableOnly) => TableIdentifier(tableOnly)
+      case Seq(databaseName, table) => TableIdentifier(table, Some(databaseName))
+      case other => sys.error("Hive only supports tables names like 'tableName' " +
+        s"or 'databaseName.tableName', found '$other'")
+    }
+  }
+
+  /**
+   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
+   * is equivalent to
+   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
+   * Check the following link for details.
+   *
+https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
+   *
+   * The bitmask denotes the grouping expressions validity for a grouping set,
+   * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
+   * e.g. In superset (k1, k2, k3), (bit 2: k1, bit 1: k2, and bit 0: k3), the grouping id of
+   * GROUPING SETS (k1, k2) and (k2) should be 1 and 5 respectively.
+   */
+  protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
+    val (keyASTs, setASTs) = children.partition {
+      case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping sets
+      case _ => true // grouping keys
+    }
+
+    val keys = keyASTs.map(nodeToExpr)
+    val keyMap = keyASTs.zipWithIndex.toMap
+
+    val mask = (1 << keys.length) - 1
+    val bitmasks: Seq[Int] = setASTs.map {
+      case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
+        columns.foldLeft(mask)((bitmap, col) => {
+          val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2).getOrElse(
+            throw new AnalysisException(s"${col.treeString} doesn't show up in the GROUP BY list"))
+          // 0 means that the column at the given index is a grouping column, 1 means it is not,
+          // so we unset the bit in bitmap.
+          bitmap & ~(1 << (keys.length - 1 - keyIndex))
+        })
+      case _ => sys.error("Expect GROUPING SETS clause")
+    }
+
+    (keys, bitmasks)
+  }
+
+  protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
+    case Token("TOK_SHOWFUNCTIONS", args) =>
+      // Skip LIKE.
+      val pattern = args match {
+        case like :: nodes if like.text.toUpperCase == "LIKE" => nodes
+        case nodes => nodes
+      }
+
+      // Extract Database and Function name
+      pattern match {
+        case Nil =>
+          ShowFunctions(None, None)
+        case Token(name, Nil) :: Nil =>
+          ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
+        case Token(db, Nil) :: Token(name, Nil) :: Nil =>
+          ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
+            Some(unquoteString(cleanIdentifier(name))))
+        case _ =>
+          noParseRule("SHOW FUNCTIONS", node)
+      }
+
+    case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
+      DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
+
+    case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
+      val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
+        queryArgs match {
+          case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
+            val cteRelations = ctes.map { node =>
+              val relation = nodeToRelation(node).asInstanceOf[SubqueryAlias]
+              relation.alias -> relation
+            }
+            (Some(from.head), inserts, Some(cteRelations.toMap))
+          case Token("TOK_FROM", from) :: inserts =>
+            (Some(from.head), inserts, None)
+          case Token("TOK_INSERT", _) :: Nil =>
+            (None, queryArgs, None)
+        }
+
+      // Return one query for each insert clause.
+      val queries = insertClauses.map {
+        case Token("TOK_INSERT", singleInsert) =>
+          val (
+            intoClause ::
+              destClause ::
+              selectClause ::
+              selectDistinctClause ::
+              whereClause ::
+              groupByClause ::
+              rollupGroupByClause ::
+              cubeGroupByClause ::
+              groupingSetsClause ::
+              orderByClause ::
+              havingClause ::
+              sortByClause ::
+              clusterByClause ::
+              distributeByClause ::
+              limitClause ::
+              lateralViewClause ::
+              windowClause :: Nil) = {
+            getClauses(
+              Seq(
+                "TOK_INSERT_INTO",
+                "TOK_DESTINATION",
+                "TOK_SELECT",
+                "TOK_SELECTDI",
+                "TOK_WHERE",
+                "TOK_GROUPBY",
+                "TOK_ROLLUP_GROUPBY",
+                "TOK_CUBE_GROUPBY",
+                "TOK_GROUPING_SETS",
+                "TOK_ORDERBY",
+                "TOK_HAVING",
+                "TOK_SORTBY",
+                "TOK_CLUSTERBY",
+                "TOK_DISTRIBUTEBY",
+                "TOK_LIMIT",
+                "TOK_LATERAL_VIEW",
+                "WINDOW"),
+              singleInsert)
+          }
+
+          val relations = fromClause match {
+            case Some(f) => nodeToRelation(f)
+            case None => OneRowRelation
+          }
+
+          val withWhere = whereClause.map { whereNode =>
+            val Seq(whereExpr) = whereNode.children
+            Filter(nodeToExpr(whereExpr), relations)
+          }.getOrElse(relations)
+
+          val select = (selectClause orElse selectDistinctClause)
+            .getOrElse(sys.error("No select clause."))
+
+          val transformation = nodeToTransformation(select.children.head, withWhere)
+
+          val withLateralView = lateralViewClause.map { lv =>
+            nodeToGenerate(lv.children.head, outer = false, withWhere)
+          }.getOrElse(withWhere)
+
+          // The projection of the query can either be a normal projection, an aggregation
+          // (if there is a group by) or a script transformation.
+          val withProject: LogicalPlan = transformation.getOrElse {
+            val selectExpressions =
+              select.children.flatMap(selExprNodeToExpr).map(UnresolvedAlias(_))
+            Seq(
+              groupByClause.map(e => e match {
+                case Token("TOK_GROUPBY", children) =>
+                  // Not a transformation so must be either project or aggregation.
+                  Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
+                case _ => sys.error("Expect GROUP BY")
+              }),
+              groupingSetsClause.map(e => e match {
+                case Token("TOK_GROUPING_SETS", children) =>
+                  val(groupByExprs, masks) = extractGroupingSet(children)
+                  GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
+                case _ => sys.error("Expect GROUPING SETS")
+              }),
+              rollupGroupByClause.map(e => e match {
+                case Token("TOK_ROLLUP_GROUPBY", children) =>
+                  Aggregate(
+                    Seq(Rollup(children.map(nodeToExpr))),
+                    selectExpressions,
+                    withLateralView)
+                case _ => sys.error("Expect WITH ROLLUP")
+              }),
+              cubeGroupByClause.map(e => e match {
+                case Token("TOK_CUBE_GROUPBY", children) =>
+                  Aggregate(
+                    Seq(Cube(children.map(nodeToExpr))),
+                    selectExpressions,
+                    withLateralView)
+                case _ => sys.error("Expect WITH CUBE")
+              }),
+              Some(Project(selectExpressions, withLateralView))).flatten.head
+          }
+
+          // Handle HAVING clause.
+          val withHaving = havingClause.map { h =>
+            val havingExpr = h.children match { case Seq(hexpr) => nodeToExpr(hexpr) }
+            // Note that we added a cast to boolean. If the expression itself is already boolean,
+            // the optimizer will get rid of the unnecessary cast.
+            Filter(Cast(havingExpr, BooleanType), withProject)
+          }.getOrElse(withProject)
+
+          // Handle SELECT DISTINCT
+          val withDistinct =
+            if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving
+
+          // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
+          val withSort =
+            (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
+              case (Some(totalOrdering), None, None, None) =>
+                Sort(totalOrdering.children.map(nodeToSortOrder), global = true, withDistinct)
+              case (None, Some(perPartitionOrdering), None, None) =>
+                Sort(
+                  perPartitionOrdering.children.map(nodeToSortOrder),
+                  global = false, withDistinct)
+              case (None, None, Some(partitionExprs), None) =>
+                RepartitionByExpression(
+                  partitionExprs.children.map(nodeToExpr), withDistinct)
+              case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
+                Sort(
+                  perPartitionOrdering.children.map(nodeToSortOrder), global = false,
+                  RepartitionByExpression(
+                    partitionExprs.children.map(nodeToExpr),
+                    withDistinct))
+              case (None, None, None, Some(clusterExprs)) =>
+                Sort(
+                  clusterExprs.children.map(nodeToExpr).map(SortOrder(_, Ascending)),
+                  global = false,
+                  RepartitionByExpression(
+                    clusterExprs.children.map(nodeToExpr),
+                    withDistinct))
+              case (None, None, None, None) => withDistinct
+              case _ => sys.error("Unsupported set of ordering / distribution clauses.")
+            }
+
+          val withLimit =
+            limitClause.map(l => nodeToExpr(l.children.head))
+              .map(Limit(_, withSort))
+              .getOrElse(withSort)
+
+          // Collect all window specifications defined in the WINDOW clause.
+          val windowDefinitions = windowClause.map(_.children.collect {
+            case Token("TOK_WINDOWDEF",
+            Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
+              windowName -> nodesToWindowSpecification(spec)
+          }.toMap)
+          // Handle cases like
+          // window w1 as (partition by p_mfgr order by p_name
+          //               range between 2 preceding and 2 following),
+          //        w2 as w1
+          val resolvedCrossReference = windowDefinitions.map {
+            windowDefMap => windowDefMap.map {
+              case (windowName, WindowSpecReference(other)) =>
+                (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition])
+              case o => o.asInstanceOf[(String, WindowSpecDefinition)]
+            }
+          }
+
+          val withWindowDefinitions =
+            resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit)
+
+          // TOK_INSERT_INTO means to add files to the table.
+          // TOK_DESTINATION means to overwrite the table.
+          val resultDestination =
+            (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
+          val overwrite = intoClause.isEmpty
+          nodeToDest(
+            resultDestination,
+            withWindowDefinitions,
+            overwrite)
+      }
+
+      // If there are multiple INSERTS just UNION them together into one query.
+      val query = if (queries.length == 1) queries.head else Union(queries)
+
+      // return With plan if there is CTE
+      cteRelations.map(With(query, _)).getOrElse(query)
+
+    case Token("TOK_UNIONALL", left :: right :: Nil) =>
+      Union(nodeToPlan(left), nodeToPlan(right))
+    case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
+      Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
+    case Token("TOK_EXCEPT", left :: right :: Nil) =>
+      Except(nodeToPlan(left), nodeToPlan(right))
+    case Token("TOK_INTERSECT", left :: right :: Nil) =>
+      Intersect(nodeToPlan(left), nodeToPlan(right))
+
+    case _ =>
+      noParseRule("Plan", node)
+  }
+
+  val allJoinTokens = "(TOK_.*JOIN)".r
+  val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
+  protected def nodeToRelation(node: ASTNode): LogicalPlan = {
+    node match {
+      case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
+        SubqueryAlias(cleanIdentifier(alias), nodeToPlan(query))
+
+      case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
+        nodeToGenerate(
+          selectClause,
+          outer = isOuter.nonEmpty,
+          nodeToRelation(relationClause))
+
+      /* All relations, possibly with aliases or sampling clauses. */
+      case Token("TOK_TABREF", clauses) =>
+        // If the last clause is not a token then it's the alias of the table.
+        val (nonAliasClauses, aliasClause) =
+          if (clauses.last.text.startsWith("TOK")) {
+            (clauses, None)
+          } else {
+            (clauses.dropRight(1), Some(clauses.last))
+          }
+
+        val (Some(tableNameParts) ::
+          splitSampleClause ::
+          bucketSampleClause :: Nil) = {
+          getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"),
+            nonAliasClauses)
+        }
+
+        val tableIdent = extractTableIdent(tableNameParts)
+        val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
+        val relation = UnresolvedRelation(tableIdent, alias)
+
+        // Apply sampling if requested.
+        (bucketSampleClause orElse splitSampleClause).map {
+          case Token("TOK_TABLESPLITSAMPLE",
+          Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
+            Limit(Literal(count.toInt), relation)
+          case Token("TOK_TABLESPLITSAMPLE",
+          Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
+            // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
+            // function takes X PERCENT as the input and the range of X is [0, 100], we need to
+            // adjust the fraction.
+            require(
+              fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
+                && fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
+              s"Sampling fraction ($fraction) must be on interval [0, 100]")
+            Sample(0.0, fraction.toDouble / 100, withReplacement = false,
+              (math.random * 1000).toInt,
+              relation)(
+              isTableSample = true)
+          case Token("TOK_TABLEBUCKETSAMPLE",
+          Token(numerator, Nil) ::
+            Token(denominator, Nil) :: Nil) =>
+            val fraction = numerator.toDouble / denominator.toDouble
+            Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, relation)(
+              isTableSample = true)
+          case a =>
+            noParseRule("Sampling", a)
+        }.getOrElse(relation)
+
+      case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) =>
+        if (!(other.size <= 1)) {
+          sys.error(s"Unsupported join operation: $other")
+        }
+
+        val joinType = joinToken match {
+          case "TOK_JOIN" => Inner
+          case "TOK_CROSSJOIN" => Inner
+          case "TOK_RIGHTOUTERJOIN" => RightOuter
+          case "TOK_LEFTOUTERJOIN" => LeftOuter
+          case "TOK_FULLOUTERJOIN" => FullOuter
+          case "TOK_LEFTSEMIJOIN" => LeftSemi
+          case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
+          case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
+          case "TOK_NATURALJOIN" => NaturalJoin(Inner)
+          case "TOK_NATURALRIGHTOUTERJOIN" => NaturalJoin(RightOuter)
+          case "TOK_NATURALLEFTOUTERJOIN" => NaturalJoin(LeftOuter)
+          case "TOK_NATURALFULLOUTERJOIN" => NaturalJoin(FullOuter)
+        }
+        Join(nodeToRelation(relation1),
+          nodeToRelation(relation2),
+          joinType,
+          other.headOption.map(nodeToExpr))
+
+      case _ =>
+        noParseRule("Relation", node)
+    }
+  }
+
+  protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
+    case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
+      SortOrder(nodeToExpr(sortExpr), Ascending)
+    case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
+      SortOrder(nodeToExpr(sortExpr), Descending)
+    case _ =>
+      noParseRule("SortOrder", node)
+  }
+
+  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
+  protected def nodeToDest(
+      node: ASTNode,
+      query: LogicalPlan,
+      overwrite: Boolean): LogicalPlan = node match {
+    case Token(destinationToken(),
+    Token("TOK_DIR",
+    Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
+      query
+
+    case Token(destinationToken(),
+    Token("TOK_TAB",
+    tableArgs) :: Nil) =>
+      val Some(tableNameParts) :: partitionClause :: Nil =
+        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+      val tableIdent = extractTableIdent(tableNameParts)
+
+      val partitionKeys = partitionClause.map(_.children.map {
+        // Parse partitions. We also make keys case insensitive.
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> None
+      }.toMap).getOrElse(Map.empty)
+
+      InsertIntoTable(
+        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = false)
+
+    case Token(destinationToken(),
+    Token("TOK_TAB",
+    tableArgs) ::
+      Token("TOK_IFNOTEXISTS",
+      ifNotExists) :: Nil) =>
+      val Some(tableNameParts) :: partitionClause :: Nil =
+        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+      val tableIdent = extractTableIdent(tableNameParts)
+
+      val partitionKeys = partitionClause.map(_.children.map {
+        // Parse partitions. We also make keys case insensitive.
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+          cleanIdentifier(key.toLowerCase) -> None
+      }.toMap).getOrElse(Map.empty)
+
+      InsertIntoTable(
+        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = true)
+
+    case _ =>
+      noParseRule("Destination", node)
+  }
+
+  protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = node match {
+    case Token("TOK_SELEXPR", e :: Nil) =>
+      Some(nodeToExpr(e))
+
+    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
+      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
+
+    case Token("TOK_SELEXPR", e :: aliasChildren) =>
+      val aliasNames = aliasChildren.collect {
+        case Token(name, Nil) => cleanIdentifier(name)
+      }
+      Some(MultiAlias(nodeToExpr(e), aliasNames))
+
+    /* Hints are ignored */
+    case Token("TOK_HINTLIST", _) => None
+
+    case _ =>
+      noParseRule("Select", node)
+  }
+
+  protected val escapedIdentifier = "`(.+)`".r
+  protected val doubleQuotedString = "\"([^\"]+)\"".r
+  protected val singleQuotedString = "'([^']+)'".r
+
+  protected def unquoteString(str: String) = str match {
+    case singleQuotedString(s) => s
+    case doubleQuotedString(s) => s
+    case other => other
+  }
+
+  /** Strips backticks from ident if present */
+  protected def cleanIdentifier(ident: String): String = ident match {
+    case escapedIdentifier(i) => i
+    case plainIdent => plainIdent
+  }
+
+  /* Case insensitive matches */
+  val COUNT = "(?i)COUNT".r
+  val SUM = "(?i)SUM".r
+  val AND = "(?i)AND".r
+  val OR = "(?i)OR".r
+  val NOT = "(?i)NOT".r
+  val TRUE = "(?i)TRUE".r
+  val FALSE = "(?i)FALSE".r
+  val LIKE = "(?i)LIKE".r
+  val RLIKE = "(?i)RLIKE".r
+  val REGEXP = "(?i)REGEXP".r
+  val IN = "(?i)IN".r
+  val DIV = "(?i)DIV".r
+  val BETWEEN = "(?i)BETWEEN".r
+  val WHEN = "(?i)WHEN".r
+  val CASE = "(?i)CASE".r
+
+  val INTEGRAL = "[+-]?\\d+".r
+  val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
+
+  protected def nodeToExpr(node: ASTNode): Expression = node match {
+    /* Attribute References */
+    case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
+      UnresolvedAttribute.quoted(cleanIdentifier(name))
+    case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
+      nodeToExpr(qualifier) match {
+        case UnresolvedAttribute(nameParts) =>
+          UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
+        case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
+      }
+    case Token("TOK_SUBQUERY_EXPR", Token("TOK_SUBQUERY_OP", Nil) :: subquery :: Nil) =>
+      ScalarSubquery(nodeToPlan(subquery))
+
+    /* Stars (*) */
+    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
+    // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
+    // has a single child which is tableName.
+    case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
+      UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
+
+    /* Aggregate Functions */
+    case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
+      Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
+    case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
+      Count(Literal(1)).toAggregateExpression()
+
+    /* Casts */
+    case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), IntegerType)
+    case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), LongType)
+    case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), FloatType)
+    case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DoubleType)
+    case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), ShortType)
+    case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), ByteType)
+    case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), BinaryType)
+    case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), BooleanType)
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, scale.text.toInt))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
+    case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), TimestampType)
+    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DateType)
+
+    /* Arithmetic */
+    case Token("+", child :: Nil) => nodeToExpr(child)
+    case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
+    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
+    case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
+    case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
+    case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
+    case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
+    case Token(DIV(), left :: right:: Nil) =>
+      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
+    case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
+    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
+    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
+    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
+
+    /* Comparisons */
+    case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+    case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
+    case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
+    case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
+    case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
+    case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
+    case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
+    case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
+    case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right))
+    case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
+    case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
+    case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
+      IsNotNull(nodeToExpr(child))
+    case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
+      IsNull(nodeToExpr(child))
+    case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
+      In(nodeToExpr(value), list.map(nodeToExpr))
+    case Token("TOK_FUNCTION",
+    Token(BETWEEN(), Nil) ::
+      kw ::
+      target ::
+      minValue ::
+      maxValue :: Nil) =>
+
+      val targetExpression = nodeToExpr(target)
+      val betweenExpr =
+        And(
+          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
+          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      kw match {
+        case Token("KW_FALSE", Nil) => betweenExpr
+        case Token("KW_TRUE", Nil) => Not(betweenExpr)
+      }
+
+    /* Boolean Logic */
+    case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
+    case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
+    case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
+    case Token("!", child :: Nil) => Not(nodeToExpr(child))
+
+    /* Case statements */
+    case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
+      CaseWhen.createFromParser(branches.map(nodeToExpr))
+    case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
+      val keyExpr = nodeToExpr(branches.head)
+      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
+
+    /* Complex datatype manipulation */
+    case Token("[", child :: ordinal :: Nil) =>
+      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
+
+    /* Window Functions */
+    case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
+      val function = nodeToExpr(node.copy(children = node.children.init))
+      nodesToWindowSpecification(spec) match {
+        case reference: WindowSpecReference =>
+          UnresolvedWindowExpression(function, reference)
+        case definition: WindowSpecDefinition =>
+          WindowExpression(function, definition)
+      }
+
+    /* UDFs - Must be last otherwise will preempt built in functions */
+    case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
+    // Aggregate function with DISTINCT keyword.
+    case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
+    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)
+
+    /* Literals */
+    case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
+    case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
+    case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
+    case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
+      Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
+
+    case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
+      Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
+
+    case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
+      Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
+
+    case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
+      Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
+
+    case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
+      Literal(ast.text.toDouble)
+
+    case ast if ast.tokenType == SparkSqlParser.Number =>
+      val text = ast.text
+      text match {
+        case INTEGRAL() =>
+          BigDecimal(text) match {
+            case v if v.isValidInt =>
+              Literal(v.intValue())
+            case v if v.isValidLong =>
+              Literal(v.longValue())
+            case v => Literal(v.underlying())
+          }
+        case DECIMAL(_*) =>
+          Literal(BigDecimal(text).underlying())
+        case _ =>
+          // Convert a scientifically notated decimal into a double.
+          Literal(text.toDouble)
+      }
+    case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
+      Literal(ParseUtils.unescapeSQLString(ast.text))
+
+    case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
+      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
+
+    case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
+      Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
+
+    case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
+      Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
+
+    case Token("TOK_INTERVAL", elements) =>
+      var interval = new CalendarInterval(0, 0)
+      var updated = false
+      elements.foreach {
+        // The interval node will always contain children for all possible time units. A child node
+        // is only useful when it contains exactly one (numeric) child.
+        case e @ Token(name, Token(value, Nil) :: Nil) =>
+          val unit = name match {
+            case "TOK_INTERVAL_YEAR_LITERAL" => "year"
+            case "TOK_INTERVAL_MONTH_LITERAL" => "month"
+            case "TOK_INTERVAL_WEEK_LITERAL" => "week"
+            case "TOK_INTERVAL_DAY_LITERAL" => "day"
+            case "TOK_INTERVAL_HOUR_LITERAL" => "hour"
+            case "TOK_INTERVAL_MINUTE_LITERAL" => "minute"
+            case "TOK_INTERVAL_SECOND_LITERAL" => "second"
+            case "TOK_INTERVAL_MILLISECOND_LITERAL" => "millisecond"
+            case "TOK_INTERVAL_MICROSECOND_LITERAL" => "microsecond"
+            case _ => noParseRule(s"Interval($name)", e)
+          }
+          interval = interval.add(CalendarInterval.fromSingleUnitString(unit, value))
+          updated = true
+        case _ =>
+      }
+      if (!updated) {
+        throw new AnalysisException("at least one time unit should be given for interval literal")
+      }
+      Literal(interval)
+
+    case _ =>
+      noParseRule("Expression", node)
+  }
+
+  /* Case insensitive matches for Window Specification */
+  val PRECEDING = "(?i)preceding".r
+  val FOLLOWING = "(?i)following".r
+  val CURRENT = "(?i)current".r
+  protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match {
+    case Token(windowName, Nil) :: Nil =>
+      // Refer to a window spec defined in the window clause.
+      WindowSpecReference(windowName)
+    case Nil =>
+      // OVER()
+      WindowSpecDefinition(
+        partitionSpec = Nil,
+        orderSpec = Nil,
+        frameSpecification = UnspecifiedFrame)
+    case spec =>
+      val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
+        getClauses(
+          Seq(
+            "TOK_PARTITIONINGSPEC",
+            "TOK_WINDOWRANGE",
+            "TOK_WINDOWVALUES"),
+          spec)
+
+      // Handle Partition By and Order By.
+      val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering =>
+        val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) =
+          getClauses(
+            Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"),
+            partitionAndOrdering.children)
+
+        (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match {
+          case (Some(partitionByExpr), Some(orderByExpr), None) =>
+            (partitionByExpr.children.map(nodeToExpr),
+              orderByExpr.children.map(nodeToSortOrder))
+          case (Some(partitionByExpr), None, None) =>
+            (partitionByExpr.children.map(nodeToExpr), Nil)
+          case (None, Some(orderByExpr), None) =>
+            (Nil, orderByExpr.children.map(nodeToSortOrder))
+          case (None, None, Some(clusterByExpr)) =>
+            val expressions = clusterByExpr.children.map(nodeToExpr)
+            (expressions, expressions.map(SortOrder(_, Ascending)))
+          case _ =>
+            noParseRule("Partition & Ordering", partitionAndOrdering)
+        }
+      }.getOrElse {
+        (Nil, Nil)
+      }
+
+      // Handle Window Frame
+      val windowFrame =
+        if (rowFrame.isEmpty && rangeFrame.isEmpty) {
+          UnspecifiedFrame
+        } else {
+          val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
+          def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
+            case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
+              if (count.toLowerCase() == "unbounded") {
+                UnboundedPreceding
+              } else {
+                ValuePreceding(count.toInt)
+              }
+            case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
+              if (count.toLowerCase() == "unbounded") {
+                UnboundedFollowing
+              } else {
+                ValueFollowing(count.toInt)
+              }
+            case Token(CURRENT(), Nil) => CurrentRow
+            case _ =>
+              noParseRule("Window Frame Boundary", node)
+          }
+
+          rowFrame.orElse(rangeFrame).map { frame =>
+            frame.children match {
+              case precedingNode :: followingNode :: Nil =>
+                SpecifiedWindowFrame(
+                  frameType,
+                  nodeToBoundary(precedingNode),
+                  nodeToBoundary(followingNode))
+              case precedingNode :: Nil =>
+                SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow)
+              case _ =>
+                noParseRule("Window Frame", frame)
+            }
+          }.getOrElse(sys.error(s"If you see this, please file a bug report with your query."))
+        }
+
+      WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
+  }
+
+  protected def nodeToTransformation(
+      node: ASTNode,
+      child: LogicalPlan): Option[ScriptTransformation] = None
+
+  val explode = "(?i)explode".r
+  val jsonTuple = "(?i)json_tuple".r
+  protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
+    val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
+
+    val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
+
+    val generator = clauses.head match {
+      case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>
+        Explode(nodeToExpr(childNode))
+      case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
+        JsonTuple(children.map(nodeToExpr))
+      case other =>
+        nodeToGenerator(other)
+    }
+
+    val attributes = clauses.collect {
+      case Token(a, Nil) => UnresolvedAttribute(a.toLowerCase)
+    }
+
+    Generate(generator, join = true, outer = outer, Some(alias.toLowerCase), attributes, child)
+  }
+
+  protected def nodeToGenerator(node: ASTNode): Generator = noParseRule("Generator", node)
+
+  protected def noParseRule(msg: String, node: ASTNode): Nothing = throw new NotImplementedError(
+    s"[$msg]: No parse rules for ASTNode type: ${node.tokenType}, tree:\n${node.treeString}")
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
new file mode 100644
index 0000000..21deb82
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.language.implicitConversions
+import scala.util.matching.Regex
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+
+import org.apache.spark.sql.types._
+
+/**
+ * This is a data type parser that can be used to parse string representations of data types
+ * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
+ */
+private[sql] trait DataTypeParser extends StandardTokenParsers {
+
+  // This is used to create a parser from a regex. We are using regexes for data type strings
+  // since these strings can be also used as column names or field names.
+  import lexical.Identifier
+  implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
+    s"identifier matching regex ${regex}",
+    { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
+  )
+
+  protected lazy val primitiveType: Parser[DataType] =
+    "(?i)string".r ^^^ StringType |
+    "(?i)float".r ^^^ FloatType |
+    "(?i)(?:int|integer)".r ^^^ IntegerType |
+    "(?i)tinyint".r ^^^ ByteType |
+    "(?i)smallint".r ^^^ ShortType |
+    "(?i)double".r ^^^ DoubleType |
+    "(?i)(?:bigint|long)".r ^^^ LongType |
+    "(?i)binary".r ^^^ BinaryType |
+    "(?i)boolean".r ^^^ BooleanType |
+    fixedDecimalType |
+    "(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
+    "(?i)date".r ^^^ DateType |
+    "(?i)timestamp".r ^^^ TimestampType |
+    varchar |
+    char
+
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+      case precision ~ scale =>
+        DecimalType(precision.toInt, scale.toInt)
+    }
+
+  protected lazy val char: Parser[DataType] =
+    "(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
+
+  protected lazy val varchar: Parser[DataType] =
+    "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
+
+  protected lazy val arrayType: Parser[DataType] =
+    "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
+      case tpe => ArrayType(tpe)
+    }
+
+  protected lazy val mapType: Parser[DataType] =
+    "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
+      case t1 ~ _ ~ t2 => MapType(t1, t2)
+    }
+
+  protected lazy val structField: Parser[StructField] =
+    ident ~ ":" ~ dataType ^^ {
+      case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
+    }
+
+  protected lazy val structType: Parser[DataType] =
+    ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">"  ^^ {
+      case fields => new StructType(fields.toArray)
+    }) |
+    ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
+
+  protected lazy val dataType: Parser[DataType] =
+    arrayType |
+    mapType |
+    structType |
+    primitiveType
+
+  def toDataType(dataTypeString: String): DataType = synchronized {
+    phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
+      case Success(result, _) => result
+      case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
+    }
+  }
+
+  private def failMessage(dataTypeString: String): String = {
+    s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
+      "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
+      "Please note that backtick itself is not supported in a field name."
+  }
+}
+
+private[sql] object DataTypeParser {
+  lazy val dataTypeParser = new DataTypeParser {
+    override val lexical = new SqlLexical
+  }
+
+  def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
+}
+
+/** The exception thrown from the [[DataTypeParser]]. */
+private[sql] class DataTypeException(message: String) extends Exception(message)

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
new file mode 100644
index 0000000..60d7361
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.util.parsing.combinator.RegexParsers
+
+import org.apache.spark.sql.types._
+
+/**
+ * Parser that turns case class strings into datatypes. This is only here to maintain compatibility
+ * with Parquet files written by Spark 1.1 and below.
+ */
+object LegacyTypeStringParser extends RegexParsers {
+
+  protected lazy val primitiveType: Parser[DataType] =
+    ( "StringType" ^^^ StringType
+      | "FloatType" ^^^ FloatType
+      | "IntegerType" ^^^ IntegerType
+      | "ByteType" ^^^ ByteType
+      | "ShortType" ^^^ ShortType
+      | "DoubleType" ^^^ DoubleType
+      | "LongType" ^^^ LongType
+      | "BinaryType" ^^^ BinaryType
+      | "BooleanType" ^^^ BooleanType
+      | "DateType" ^^^ DateType
+      | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
+      | fixedDecimalType
+      | "TimestampType" ^^^ TimestampType
+      )
+
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
+      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+    }
+
+  protected lazy val arrayType: Parser[DataType] =
+    "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
+      case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
+    }
+
+  protected lazy val mapType: Parser[DataType] =
+    "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
+      case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
+    }
+
+  protected lazy val structField: Parser[StructField] =
+    ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
+      case name ~ tpe ~ nullable =>
+        StructField(name, tpe, nullable = nullable)
+    }
+
+  protected lazy val boolVal: Parser[Boolean] =
+    ( "true" ^^^ true
+      | "false" ^^^ false
+      )
+
+  protected lazy val structType: Parser[DataType] =
+    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
+      case fields => StructType(fields)
+    }
+
+  protected lazy val dataType: Parser[DataType] =
+    ( arrayType
+      | mapType
+      | structType
+      | primitiveType
+      )
+
+  /**
+   * Parses a string representation of a DataType.
+   */
+  def parse(asString: String): DataType = parseAll(dataType, asString) match {
+    case Success(result, _) => result
+    case failure: NoSuccess =>
+      throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
new file mode 100644
index 0000000..7f35d65
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Interface for a parser.
+ */
+trait ParserInterface {
+  /** Creates LogicalPlan for a given SQL string. */
+  def parsePlan(sqlText: String): LogicalPlan
+
+  /** Creates Expression for a given SQL string. */
+  def parseExpression(sqlText: String): Expression
+
+  /** Creates TableIdentifier for a given SQL string. */
+  def parseTableIdentifier(sqlText: String): TableIdentifier
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
deleted file mode 100644
index 515c071..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import scala.language.implicitConversions
-import scala.util.matching.Regex
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-
-import org.apache.spark.sql.catalyst.SqlLexical
-import org.apache.spark.sql.types._
-
-/**
- * This is a data type parser that can be used to parse string representations of data types
- * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
- */
-private[sql] trait DataTypeParser extends StandardTokenParsers {
-
-  // This is used to create a parser from a regex. We are using regexes for data type strings
-  // since these strings can be also used as column names or field names.
-  import lexical.Identifier
-  implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
-    s"identifier matching regex ${regex}",
-    { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
-  )
-
-  protected lazy val primitiveType: Parser[DataType] =
-    "(?i)string".r ^^^ StringType |
-    "(?i)float".r ^^^ FloatType |
-    "(?i)(?:int|integer)".r ^^^ IntegerType |
-    "(?i)tinyint".r ^^^ ByteType |
-    "(?i)smallint".r ^^^ ShortType |
-    "(?i)double".r ^^^ DoubleType |
-    "(?i)(?:bigint|long)".r ^^^ LongType |
-    "(?i)binary".r ^^^ BinaryType |
-    "(?i)boolean".r ^^^ BooleanType |
-    fixedDecimalType |
-    "(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
-    "(?i)date".r ^^^ DateType |
-    "(?i)timestamp".r ^^^ TimestampType |
-    varchar |
-    char
-
-  protected lazy val fixedDecimalType: Parser[DataType] =
-    ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
-      case precision ~ scale =>
-        DecimalType(precision.toInt, scale.toInt)
-    }
-
-  protected lazy val char: Parser[DataType] =
-    "(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
-  protected lazy val varchar: Parser[DataType] =
-    "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
-  protected lazy val arrayType: Parser[DataType] =
-    "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
-      case tpe => ArrayType(tpe)
-    }
-
-  protected lazy val mapType: Parser[DataType] =
-    "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
-      case t1 ~ _ ~ t2 => MapType(t1, t2)
-    }
-
-  protected lazy val structField: Parser[StructField] =
-    ident ~ ":" ~ dataType ^^ {
-      case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
-    }
-
-  protected lazy val structType: Parser[DataType] =
-    ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">"  ^^ {
-      case fields => new StructType(fields.toArray)
-    }) |
-    ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
-
-  protected lazy val dataType: Parser[DataType] =
-    arrayType |
-    mapType |
-    structType |
-    primitiveType
-
-  def toDataType(dataTypeString: String): DataType = synchronized {
-    phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
-      case Success(result, _) => result
-      case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
-    }
-  }
-
-  private def failMessage(dataTypeString: String): String = {
-    s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
-      "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
-      "Please note that backtick itself is not supported in a field name."
-  }
-}
-
-private[sql] object DataTypeParser {
-  lazy val dataTypeParser = new DataTypeParser {
-    override val lexical = new SqlLexical
-  }
-
-  def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
-}
-
-/** The exception thrown from the [[DataTypeParser]]. */
-private[sql] class DataTypeException(message: String) extends Exception(message)

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
deleted file mode 100644
index e27cf9c..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import scala.util.parsing.combinator.RegexParsers
-
-import org.apache.spark.sql.types._
-
-/**
- * Parser that turns case class strings into datatypes. This is only here to maintain compatibility
- * with Parquet files written by Spark 1.1 and below.
- */
-object LegacyTypeStringParser extends RegexParsers {
-
-  protected lazy val primitiveType: Parser[DataType] =
-    ( "StringType" ^^^ StringType
-      | "FloatType" ^^^ FloatType
-      | "IntegerType" ^^^ IntegerType
-      | "ByteType" ^^^ ByteType
-      | "ShortType" ^^^ ShortType
-      | "DoubleType" ^^^ DoubleType
-      | "LongType" ^^^ LongType
-      | "BinaryType" ^^^ BinaryType
-      | "BooleanType" ^^^ BooleanType
-      | "DateType" ^^^ DateType
-      | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
-      | fixedDecimalType
-      | "TimestampType" ^^^ TimestampType
-      )
-
-  protected lazy val fixedDecimalType: Parser[DataType] =
-    ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
-      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
-    }
-
-  protected lazy val arrayType: Parser[DataType] =
-    "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
-      case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
-    }
-
-  protected lazy val mapType: Parser[DataType] =
-    "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
-      case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
-    }
-
-  protected lazy val structField: Parser[StructField] =
-    ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
-      case name ~ tpe ~ nullable =>
-        StructField(name, tpe, nullable = nullable)
-    }
-
-  protected lazy val boolVal: Parser[Boolean] =
-    ( "true" ^^^ true
-      | "false" ^^^ false
-      )
-
-  protected lazy val structType: Parser[DataType] =
-    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
-      case fields => StructType(fields)
-    }
-
-  protected lazy val dataType: Parser[DataType] =
-    ( arrayType
-      | mapType
-      | structType
-      | primitiveType
-      )
-
-  /**
-   * Parses a string representation of a DataType.
-   */
-  def parse(asString: String): DataType = parseAll(dataType, asString) match {
-    case Success(result, _) => result
-    case failure: NoSuccess =>
-      throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 271ca95..1238eef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -25,7 +25,8 @@ import org.json4s.JsonDSL._
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
-import org.apache.spark.sql.catalyst.util.{quoteIdentifier, DataTypeParser, LegacyTypeStringParser}
+import org.apache.spark.sql.catalyst.parser.{DataTypeParser, LegacyTypeStringParser}
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 
 /**
  * :: DeveloperApi ::

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
deleted file mode 100644
index 53a8d6e..0000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.unsafe.types.CalendarInterval
-
-class CatalystQlSuite extends PlanTest {
-  val parser = new CatalystQl()
-
-  test("test case insensitive") {
-    val result = Project(UnresolvedAlias(Literal(1)):: Nil, OneRowRelation)
-    assert(result === parser.parsePlan("seLect 1"))
-    assert(result === parser.parsePlan("select 1"))
-    assert(result === parser.parsePlan("SELECT 1"))
-  }
-
-  test("test NOT operator with comparison operations") {
-    val parsed = parser.parsePlan("SELECT NOT TRUE > TRUE")
-    val expected = Project(
-      UnresolvedAlias(
-        Not(
-          GreaterThan(Literal(true), Literal(true)))
-      ) :: Nil,
-      OneRowRelation)
-    comparePlans(parsed, expected)
-  }
-
-  test("test Union Distinct operator") {
-    val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
-    val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
-    val expected =
-      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-        SubqueryAlias("u_1",
-          Distinct(
-            Union(
-              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-                UnresolvedRelation(TableIdentifier("t0"), None)),
-              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-                UnresolvedRelation(TableIdentifier("t1"), None))))))
-    comparePlans(parsed1, expected)
-    comparePlans(parsed2, expected)
-  }
-
-  test("test Union All operator") {
-    val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
-    val expected =
-      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-        SubqueryAlias("u_1",
-          Union(
-            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-              UnresolvedRelation(TableIdentifier("t0"), None)),
-            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-              UnresolvedRelation(TableIdentifier("t1"), None)))))
-    comparePlans(parsed, expected)
-  }
-
-  test("support hive interval literal") {
-    def checkInterval(sql: String, result: CalendarInterval): Unit = {
-      val parsed = parser.parsePlan(sql)
-      val expected = Project(
-        UnresolvedAlias(
-          Literal(result)
-        ) :: Nil,
-        OneRowRelation)
-      comparePlans(parsed, expected)
-    }
-
-    def checkYearMonth(lit: String): Unit = {
-      checkInterval(
-        s"SELECT INTERVAL '$lit' YEAR TO MONTH",
-        CalendarInterval.fromYearMonthString(lit))
-    }
-
-    def checkDayTime(lit: String): Unit = {
-      checkInterval(
-        s"SELECT INTERVAL '$lit' DAY TO SECOND",
-        CalendarInterval.fromDayTimeString(lit))
-    }
-
-    def checkSingleUnit(lit: String, unit: String): Unit = {
-      checkInterval(
-        s"SELECT INTERVAL '$lit' $unit",
-        CalendarInterval.fromSingleUnitString(unit, lit))
-    }
-
-    checkYearMonth("123-10")
-    checkYearMonth("496-0")
-    checkYearMonth("-2-3")
-    checkYearMonth("-123-0")
-
-    checkDayTime("99 11:22:33.123456789")
-    checkDayTime("-99 11:22:33.123456789")
-    checkDayTime("10 9:8:7.123456789")
-    checkDayTime("1 0:0:0")
-    checkDayTime("-1 0:0:0")
-    checkDayTime("1 0:0:1")
-
-    for (unit <- Seq("year", "month", "day", "hour", "minute", "second")) {
-      checkSingleUnit("7", unit)
-      checkSingleUnit("-7", unit)
-      checkSingleUnit("0", unit)
-    }
-
-    checkSingleUnit("13.123456789", "second")
-    checkSingleUnit("-13.123456789", "second")
-  }
-
-  test("support scientific notation") {
-    def assertRight(input: String, output: Double): Unit = {
-      val parsed = parser.parsePlan("SELECT " + input)
-      val expected = Project(
-        UnresolvedAlias(
-          Literal(output)
-        ) :: Nil,
-        OneRowRelation)
-      comparePlans(parsed, expected)
-    }
-
-    assertRight("9.0e1", 90)
-    assertRight(".9e+2", 90)
-    assertRight("0.9e+2", 90)
-    assertRight("900e-1", 90)
-    assertRight("900.0E-1", 90)
-    assertRight("9.e+1", 90)
-
-    intercept[AnalysisException](parser.parsePlan("SELECT .e3"))
-  }
-
-  test("parse expressions") {
-    compareExpressions(
-      parser.parseExpression("prinln('hello', 'world')"),
-      UnresolvedFunction(
-        "prinln", Literal("hello") :: Literal("world") :: Nil, false))
-
-    compareExpressions(
-      parser.parseExpression("1 + r.r As q"),
-      Alias(Add(Literal(1), UnresolvedAttribute("r.r")), "q")())
-
-    compareExpressions(
-      parser.parseExpression("1 - f('o', o(bar))"),
-      Subtract(Literal(1),
-        UnresolvedFunction("f",
-          Literal("o") ::
-          UnresolvedFunction("o", UnresolvedAttribute("bar") :: Nil, false) ::
-          Nil, false)))
-
-    intercept[AnalysisException](parser.parseExpression("1 - f('o', o(bar)) hello * world"))
-  }
-
-  test("table identifier") {
-    assert(TableIdentifier("q") === parser.parseTableIdentifier("q"))
-    assert(TableIdentifier("q", Some("d")) === parser.parseTableIdentifier("d.q"))
-    intercept[AnalysisException](parser.parseTableIdentifier(""))
-    intercept[AnalysisException](parser.parseTableIdentifier("d.q.g"))
-  }
-
-  test("parse union/except/intersect") {
-    parser.parsePlan("select * from t1 union all select * from t2")
-    parser.parsePlan("select * from t1 union distinct select * from t2")
-    parser.parsePlan("select * from t1 union select * from t2")
-    parser.parsePlan("select * from t1 except select * from t2")
-    parser.parsePlan("select * from t1 intersect select * from t2")
-    parser.parsePlan("(select * from t1) union all (select * from t2)")
-    parser.parsePlan("(select * from t1) union distinct (select * from t2)")
-    parser.parsePlan("(select * from t1) union (select * from t2)")
-    parser.parsePlan("select * from ((select * from t1) union (select * from t2)) t")
-  }
-
-  test("window function: better support of parentheses") {
-    parser.parsePlan("select sum(product + 1) over (partition by ((1) + (product / 2)) " +
-      "order by 2) from windowData")
-    parser.parsePlan("select sum(product + 1) over (partition by (1 + (product / 2)) " +
-      "order by 2) from windowData")
-    parser.parsePlan("select sum(product + 1) over (partition by ((product / 2) + 1) " +
-      "order by 2) from windowData")
-
-    parser.parsePlan("select sum(product + 1) over (partition by ((product) + (1)) order by 2) " +
-      "from windowData")
-    parser.parsePlan("select sum(product + 1) over (partition by ((product) + 1) order by 2) " +
-      "from windowData")
-    parser.parsePlan("select sum(product + 1) over (partition by (product + (1)) order by 2) " +
-      "from windowData")
-  }
-
-  test("subquery") {
-    parser.parsePlan("select (select max(b) from s) ss from t")
-    parser.parsePlan("select * from t where a = (select b from s)")
-    parser.parsePlan("select * from t group by g having a > (select b from s)")
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
new file mode 100644
index 0000000..0660791
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+class CatalystQlSuite extends PlanTest {
+  val parser = new CatalystQl()
+
+  test("test case insensitive") {
+    val result = Project(UnresolvedAlias(Literal(1)):: Nil, OneRowRelation)
+    assert(result === parser.parsePlan("seLect 1"))
+    assert(result === parser.parsePlan("select 1"))
+    assert(result === parser.parsePlan("SELECT 1"))
+  }
+
+  test("test NOT operator with comparison operations") {
+    val parsed = parser.parsePlan("SELECT NOT TRUE > TRUE")
+    val expected = Project(
+      UnresolvedAlias(
+        Not(
+          GreaterThan(Literal(true), Literal(true)))
+      ) :: Nil,
+      OneRowRelation)
+    comparePlans(parsed, expected)
+  }
+
+  test("test Union Distinct operator") {
+    val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
+    val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
+    val expected =
+      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+        SubqueryAlias("u_1",
+          Distinct(
+            Union(
+              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+                UnresolvedRelation(TableIdentifier("t0"), None)),
+              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+                UnresolvedRelation(TableIdentifier("t1"), None))))))
+    comparePlans(parsed1, expected)
+    comparePlans(parsed2, expected)
+  }
+
+  test("test Union All operator") {
+    val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
+    val expected =
+      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+        SubqueryAlias("u_1",
+          Union(
+            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+              UnresolvedRelation(TableIdentifier("t0"), None)),
+            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+              UnresolvedRelation(TableIdentifier("t1"), None)))))
+    comparePlans(parsed, expected)
+  }
+
+  test("support hive interval literal") {
+    def checkInterval(sql: String, result: CalendarInterval): Unit = {
+      val parsed = parser.parsePlan(sql)
+      val expected = Project(
+        UnresolvedAlias(
+          Literal(result)
+        ) :: Nil,
+        OneRowRelation)
+      comparePlans(parsed, expected)
+    }
+
+    def checkYearMonth(lit: String): Unit = {
+      checkInterval(
+        s"SELECT INTERVAL '$lit' YEAR TO MONTH",
+        CalendarInterval.fromYearMonthString(lit))
+    }
+
+    def checkDayTime(lit: String): Unit = {
+      checkInterval(
+        s"SELECT INTERVAL '$lit' DAY TO SECOND",
+        CalendarInterval.fromDayTimeString(lit))
+    }
+
+    def checkSingleUnit(lit: String, unit: String): Unit = {
+      checkInterval(
+        s"SELECT INTERVAL '$lit' $unit",
+        CalendarInterval.fromSingleUnitString(unit, lit))
+    }
+
+    checkYearMonth("123-10")
+    checkYearMonth("496-0")
+    checkYearMonth("-2-3")
+    checkYearMonth("-123-0")
+
+    checkDayTime("99 11:22:33.123456789")
+    checkDayTime("-99 11:22:33.123456789")
+    checkDayTime("10 9:8:7.123456789")
+    checkDayTime("1 0:0:0")
+    checkDayTime("-1 0:0:0")
+    checkDayTime("1 0:0:1")
+
+    for (unit <- Seq("year", "month", "day", "hour", "minute", "second")) {
+      checkSingleUnit("7", unit)
+      checkSingleUnit("-7", unit)
+      checkSingleUnit("0", unit)
+    }
+
+    checkSingleUnit("13.123456789", "second")
+    checkSingleUnit("-13.123456789", "second")
+  }
+
+  test("support scientific notation") {
+    def assertRight(input: String, output: Double): Unit = {
+      val parsed = parser.parsePlan("SELECT " + input)
+      val expected = Project(
+        UnresolvedAlias(
+          Literal(output)
+        ) :: Nil,
+        OneRowRelation)
+      comparePlans(parsed, expected)
+    }
+
+    assertRight("9.0e1", 90)
+    assertRight(".9e+2", 90)
+    assertRight("0.9e+2", 90)
+    assertRight("900e-1", 90)
+    assertRight("900.0E-1", 90)
+    assertRight("9.e+1", 90)
+
+    intercept[AnalysisException](parser.parsePlan("SELECT .e3"))
+  }
+
+  test("parse expressions") {
+    compareExpressions(
+      parser.parseExpression("prinln('hello', 'world')"),
+      UnresolvedFunction(
+        "prinln", Literal("hello") :: Literal("world") :: Nil, false))
+
+    compareExpressions(
+      parser.parseExpression("1 + r.r As q"),
+      Alias(Add(Literal(1), UnresolvedAttribute("r.r")), "q")())
+
+    compareExpressions(
+      parser.parseExpression("1 - f('o', o(bar))"),
+      Subtract(Literal(1),
+        UnresolvedFunction("f",
+          Literal("o") ::
+          UnresolvedFunction("o", UnresolvedAttribute("bar") :: Nil, false) ::
+          Nil, false)))
+
+    intercept[AnalysisException](parser.parseExpression("1 - f('o', o(bar)) hello * world"))
+  }
+
+  test("table identifier") {
+    assert(TableIdentifier("q") === parser.parseTableIdentifier("q"))
+    assert(TableIdentifier("q", Some("d")) === parser.parseTableIdentifier("d.q"))
+    intercept[AnalysisException](parser.parseTableIdentifier(""))
+    intercept[AnalysisException](parser.parseTableIdentifier("d.q.g"))
+  }
+
+  test("parse union/except/intersect") {
+    parser.parsePlan("select * from t1 union all select * from t2")
+    parser.parsePlan("select * from t1 union distinct select * from t2")
+    parser.parsePlan("select * from t1 union select * from t2")
+    parser.parsePlan("select * from t1 except select * from t2")
+    parser.parsePlan("select * from t1 intersect select * from t2")
+    parser.parsePlan("(select * from t1) union all (select * from t2)")
+    parser.parsePlan("(select * from t1) union distinct (select * from t2)")
+    parser.parsePlan("(select * from t1) union (select * from t2)")
+    parser.parsePlan("select * from ((select * from t1) union (select * from t2)) t")
+  }
+
+  test("window function: better support of parentheses") {
+    parser.parsePlan("select sum(product + 1) over (partition by ((1) + (product / 2)) " +
+      "order by 2) from windowData")
+    parser.parsePlan("select sum(product + 1) over (partition by (1 + (product / 2)) " +
+      "order by 2) from windowData")
+    parser.parsePlan("select sum(product + 1) over (partition by ((product / 2) + 1) " +
+      "order by 2) from windowData")
+
+    parser.parsePlan("select sum(product + 1) over (partition by ((product) + (1)) order by 2) " +
+      "from windowData")
+    parser.parsePlan("select sum(product + 1) over (partition by ((product) + 1) order by 2) " +
+      "from windowData")
+    parser.parsePlan("select sum(product + 1) over (partition by (product + (1)) order by 2) " +
+      "from windowData")
+  }
+
+  test("subquery") {
+    parser.parsePlan("select (select max(b) from s) ss from t")
+    parser.parsePlan("select * from t where a = (select b from s)")
+    parser.parsePlan("select * from t group by g having a > (select b from s)")
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
new file mode 100644
index 0000000..7d36080
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -0,0 +1,123 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types._
+
+class DataTypeParserSuite extends SparkFunSuite {
+
+  def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
+    test(s"parse ${dataTypeString.replace("\n", "")}") {
+      assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
+    }
+  }
+
+  def unsupported(dataTypeString: String): Unit = {
+    test(s"$dataTypeString is not supported") {
+      intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
+    }
+  }
+
+  checkDataType("int", IntegerType)
+  checkDataType("integer", IntegerType)
+  checkDataType("BooLean", BooleanType)
+  checkDataType("tinYint", ByteType)
+  checkDataType("smallINT", ShortType)
+  checkDataType("INT", IntegerType)
+  checkDataType("INTEGER", IntegerType)
+  checkDataType("bigint", LongType)
+  checkDataType("float", FloatType)
+  checkDataType("dOUBle", DoubleType)
+  checkDataType("decimal(10, 5)", DecimalType(10, 5))
+  checkDataType("decimal", DecimalType.USER_DEFAULT)
+  checkDataType("DATE", DateType)
+  checkDataType("timestamp", TimestampType)
+  checkDataType("string", StringType)
+  checkDataType("ChaR(5)", StringType)
+  checkDataType("varchAr(20)", StringType)
+  checkDataType("cHaR(27)", StringType)
+  checkDataType("BINARY", BinaryType)
+
+  checkDataType("array<doublE>", ArrayType(DoubleType, true))
+  checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
+  checkDataType(
+    "array<struct<tinYint:tinyint>>",
+    ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
+  )
+  checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
+  checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
+  checkDataType(
+    "MAP<int, struct<varchar:string>>",
+    MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
+  )
+
+  checkDataType(
+    "struct<intType: int, ts:timestamp>",
+    StructType(
+      StructField("intType", IntegerType, true) ::
+      StructField("ts", TimestampType, true) :: Nil)
+  )
+  // It is fine to use the data type string as the column name.
+  checkDataType(
+    "Struct<int: int, timestamp:timestamp>",
+    StructType(
+      StructField("int", IntegerType, true) ::
+      StructField("timestamp", TimestampType, true) :: Nil)
+  )
+  checkDataType(
+    """
+      |struct<
+      |  struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
+      |  MAP:Map<timestamp, varchar(10)>,
+      |  arrAy:Array<double>,
+      |  anotherArray:Array<char(9)>>
+    """.stripMargin,
+    StructType(
+      StructField("struct",
+        StructType(
+          StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
+          StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
+      StructField("MAP", MapType(TimestampType, StringType), true) ::
+      StructField("arrAy", ArrayType(DoubleType, true), true) ::
+      StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
+  )
+  // A column name can be a reserved word in our DDL parser and SqlParser.
+  checkDataType(
+    "Struct<TABLE: string, CASE:boolean>",
+    StructType(
+      StructField("TABLE", StringType, true) ::
+      StructField("CASE", BooleanType, true) :: Nil)
+  )
+  // Use backticks to quote column names having special characters.
+  checkDataType(
+    "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
+    StructType(
+      StructField("x+y", IntegerType, true) ::
+      StructField("!@#$%^&*()", StringType, true) ::
+      StructField("1_2.345<>:\"", StringType, true) :: Nil)
+  )
+  // Empty struct.
+  checkDataType("strUCt<>", StructType(Nil))
+
+  unsupported("it is not a data type")
+  unsupported("struct<x+y: int, 1.1:timestamp>")
+  unsupported("struct<x: int")
+  unsupported("struct<x int, y string>")
+  unsupported("struct<`x``y` int>")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[3/3] spark git commit: [SPARK-13633][SQL] Move things into catalyst.parser package

Posted by an...@apache.org.

[SPARK-13633][SQL] Move things into catalyst.parser package

## What changes were proposed in this pull request?

This patch simply moves things to existing package `o.a.s.sql.catalyst.parser` in an effort to reduce the size of the diff in #11048. This is conceptually the same as a recently merged patch #11482.

## How was this patch tested?

Jenkins.

Author: Andrew Or <an...@databricks.com>

Closes #11506 from andrewor14/parser-package.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b7d41474
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b7d41474
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b7d41474

Branch: refs/heads/master
Commit: b7d41474216787e9cd38c04a15c43d5d02f02f93
Parents: 83302c3
Author: Andrew Or <an...@databricks.com>
Authored: Fri Mar 4 10:32:00 2016 -0800
Committer: Andrew Or <an...@databricks.com>
Committed: Fri Mar 4 10:32:00 2016 -0800

----------------------------------------------------------------------
 .../sql/catalyst/AbstractSparkSQLParser.scala   |  145 ---
 .../apache/spark/sql/catalyst/CatalystQl.scala  | 1010 -----------------
 .../spark/sql/catalyst/ParserInterface.scala    |   35 -
 .../parser/AbstractSparkSQLParser.scala         |  145 +++
 .../spark/sql/catalyst/parser/CatalystQl.scala  | 1012 ++++++++++++++++++
 .../sql/catalyst/parser/DataTypeParser.scala    |  119 ++
 .../parser/LegacyTypeStringParser.scala         |   92 ++
 .../sql/catalyst/parser/ParserInterface.scala   |   36 +
 .../sql/catalyst/util/DataTypeParser.scala      |  120 ---
 .../catalyst/util/LegacyTypeStringParser.scala  |   92 --
 .../org/apache/spark/sql/types/StructType.scala |    3 +-
 .../spark/sql/catalyst/CatalystQlSuite.scala    |  210 ----
 .../sql/catalyst/parser/CatalystQlSuite.scala   |  211 ++++
 .../catalyst/parser/DataTypeParserSuite.scala   |  123 +++
 .../sql/catalyst/util/DataTypeParserSuite.scala |  123 ---
 .../scala/org/apache/spark/sql/Column.scala     |    3 +-
 .../scala/org/apache/spark/sql/SQLContext.scala |    1 +
 .../apache/spark/sql/execution/SparkQl.scala    |    4 +-
 .../datasources/parquet/ParquetRelation.scala   |    2 +-
 .../scala/org/apache/spark/sql/functions.scala  |    3 +-
 .../spark/sql/internal/SessionState.scala       |    2 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |    2 +-
 .../spark/sql/hive/HiveSessionState.scala       |    2 +-
 23 files changed, 1751 insertions(+), 1744 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
deleted file mode 100644
index 38fa5cb..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import scala.language.implicitConversions
-import scala.util.parsing.combinator.lexical.StdLexical
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.combinator.PackratParsers
-import scala.util.parsing.input.CharArrayReader.EofCh
-
-import org.apache.spark.sql.catalyst.plans.logical._
-
-private[sql] abstract class AbstractSparkSQLParser
-  extends StandardTokenParsers with PackratParsers with ParserInterface {
-
-  def parsePlan(input: String): LogicalPlan = synchronized {
-    // Initialize the Keywords.
-    initLexical
-    phrase(start)(new lexical.Scanner(input)) match {
-      case Success(plan, _) => plan
-      case failureOrError => sys.error(failureOrError.toString)
-    }
-  }
-  /* One time initialization of lexical.This avoid reinitialization of  lexical in parse method */
-  protected lazy val initLexical: Unit = lexical.initialize(reservedWords)
-
-  protected case class Keyword(str: String) {
-    def normalize: String = lexical.normalizeKeyword(str)
-    def parser: Parser[String] = normalize
-  }
-
-  protected implicit def asParser(k: Keyword): Parser[String] = k.parser
-
-  // By default, use Reflection to find the reserved words defined in the sub class.
-  // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
-  // method during the parent class instantiation, because the sub class instance
-  // isn't created yet.
-  protected lazy val reservedWords: Seq[String] =
-    this
-      .getClass
-      .getMethods
-      .filter(_.getReturnType == classOf[Keyword])
-      .map(_.invoke(this).asInstanceOf[Keyword].normalize)
-
-  // Set the keywords as empty by default, will change that later.
-  override val lexical = new SqlLexical
-
-  protected def start: Parser[LogicalPlan]
-
-  // Returns the whole input string
-  protected lazy val wholeInput: Parser[String] = new Parser[String] {
-    def apply(in: Input): ParseResult[String] =
-      Success(in.source.toString, in.drop(in.source.length()))
-  }
-
-  // Returns the rest of the input string that are not parsed yet
-  protected lazy val restInput: Parser[String] = new Parser[String] {
-    def apply(in: Input): ParseResult[String] =
-      Success(
-        in.source.subSequence(in.offset, in.source.length()).toString,
-        in.drop(in.source.length()))
-  }
-}
-
-class SqlLexical extends StdLexical {
-  case class DecimalLit(chars: String) extends Token {
-    override def toString: String = chars
-  }
-
-  /* This is a work around to support the lazy setting */
-  def initialize(keywords: Seq[String]): Unit = {
-    reserved.clear()
-    reserved ++= keywords
-  }
-
-  /* Normal the keyword string */
-  def normalizeKeyword(str: String): String = str.toLowerCase
-
-  delimiters += (
-    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
-    ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
-  )
-
-  protected override def processIdent(name: String) = {
-    val token = normalizeKeyword(name)
-    if (reserved contains token) Keyword(token) else Identifier(name)
-  }
-
-  override lazy val token: Parser[Token] =
-    ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
-    | '.' ~> (rep1(digit) ~ scientificNotation) ^^
-      { case i ~ s => DecimalLit("0." + i.mkString + s) }
-    | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
-      { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
-    | digit.* ~ identChar ~ (identChar | digit).* ^^
-      { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
-    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
-        case i ~ None => NumericLit(i.mkString)
-        case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
-      }
-    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
-      { case chars => StringLit(chars mkString "") }
-    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
-      { case chars => StringLit(chars mkString "") }
-    | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
-      { case chars => Identifier(chars mkString "") }
-    | EofCh ^^^ EOF
-    | '\'' ~> failure("unclosed string literal")
-    | '"' ~> failure("unclosed string literal")
-    | delim
-    | failure("illegal character")
-    )
-
-  override def identChar: Parser[Elem] = letter | elem('_')
-
-  private lazy val scientificNotation: Parser[String] =
-    (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
-      case s ~ rest => "e" + s.mkString + rest.mkString
-    }
-
-  override def whitespace: Parser[Any] =
-    ( whitespaceChar
-    | '/' ~ '*' ~ comment
-    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
-    | '#' ~ chrExcept(EofCh, '\n').*
-    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
-    | '/' ~ '*' ~ failure("unclosed comment")
-    ).*
-}
-

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
deleted file mode 100644
index a0a56d7..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.catalyst
-
-import java.sql.Date
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.Count
-import org.apache.spark.sql.catalyst.parser._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
-import org.apache.spark.util.random.RandomSampler
-
-/**
- * This class translates SQL to Catalyst [[LogicalPlan]]s or [[Expression]]s.
- */
-private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserInterface {
-  object Token {
-    def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
-      CurrentOrigin.setPosition(node.line, node.positionInLine)
-      node.pattern
-    }
-  }
-
-  /**
-   * The safeParse method allows a user to focus on the parsing/AST transformation logic. This
-   * method will take care of possible errors during the parsing process.
-   */
-  protected def safeParse[T](sql: String, ast: ASTNode)(toResult: ASTNode => T): T = {
-    try {
-      toResult(ast)
-    } catch {
-      case e: MatchError => throw e
-      case e: AnalysisException => throw e
-      case e: Exception =>
-        throw new AnalysisException(e.getMessage)
-      case e: NotImplementedError =>
-        throw new AnalysisException(
-          s"""Unsupported language features in query
-             |== SQL ==
-             |$sql
-             |== AST ==
-             |${ast.treeString}
-             |== Error ==
-             |$e
-             |== Stacktrace ==
-             |${e.getStackTrace.head}
-          """.stripMargin)
-    }
-  }
-
-  /** Creates LogicalPlan for a given SQL string. */
-  def parsePlan(sql: String): LogicalPlan =
-    safeParse(sql, ParseDriver.parsePlan(sql, conf))(nodeToPlan)
-
-  /** Creates Expression for a given SQL string. */
-  def parseExpression(sql: String): Expression =
-    safeParse(sql, ParseDriver.parseExpression(sql, conf))(selExprNodeToExpr(_).get)
-
-  /** Creates TableIdentifier for a given SQL string. */
-  def parseTableIdentifier(sql: String): TableIdentifier =
-    safeParse(sql, ParseDriver.parseTableName(sql, conf))(extractTableIdent)
-
-  def parseDdl(sql: String): Seq[Attribute] = {
-    safeParse(sql, ParseDriver.parseExpression(sql, conf)) { ast =>
-      val Token("TOK_CREATETABLE", children) = ast
-      children
-        .find(_.text == "TOK_TABCOLLIST")
-        .getOrElse(sys.error("No columnList!"))
-        .flatMap(_.children.map(nodeToAttribute))
-    }
-  }
-
-  protected def getClauses(
-      clauseNames: Seq[String],
-      nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
-    var remainingNodes = nodeList
-    val clauses = clauseNames.map { clauseName =>
-      val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase == clauseName)
-      remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil)
-      matches.headOption
-    }
-
-    if (remainingNodes.nonEmpty) {
-      sys.error(
-        s"""Unhandled clauses: ${remainingNodes.map(_.treeString).mkString("\n")}.
-            |You are likely trying to use an unsupported Hive feature."""".stripMargin)
-    }
-    clauses
-  }
-
-  protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): ASTNode =
-    getClauseOption(clauseName, nodeList).getOrElse(sys.error(
-      s"Expected clause $clauseName missing from ${nodeList.map(_.treeString).mkString("\n")}"))
-
-  protected def getClauseOption(clauseName: String, nodeList: Seq[ASTNode]): Option[ASTNode] = {
-    nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
-      case Seq(oneMatch) => Some(oneMatch)
-      case Seq() => None
-      case _ => sys.error(s"Found multiple instances of clause $clauseName")
-    }
-  }
-
-  protected def nodeToAttribute(node: ASTNode): Attribute = node match {
-    case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
-      AttributeReference(colName, nodeToDataType(dataType), nullable = true)()
-    case _ =>
-      noParseRule("Attribute", node)
-  }
-
-  protected def nodeToDataType(node: ASTNode): DataType = node match {
-    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
-      DecimalType(precision.text.toInt, scale.text.toInt)
-    case Token("TOK_DECIMAL", precision :: Nil) =>
-      DecimalType(precision.text.toInt, 0)
-    case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
-    case Token("TOK_BIGINT", Nil) => LongType
-    case Token("TOK_INT", Nil) => IntegerType
-    case Token("TOK_TINYINT", Nil) => ByteType
-    case Token("TOK_SMALLINT", Nil) => ShortType
-    case Token("TOK_BOOLEAN", Nil) => BooleanType
-    case Token("TOK_STRING", Nil) => StringType
-    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
-    case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
-    case Token("TOK_FLOAT", Nil) => FloatType
-    case Token("TOK_DOUBLE", Nil) => DoubleType
-    case Token("TOK_DATE", Nil) => DateType
-    case Token("TOK_TIMESTAMP", Nil) => TimestampType
-    case Token("TOK_BINARY", Nil) => BinaryType
-    case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
-    case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
-      StructType(fields.map(nodeToStructField))
-    case Token("TOK_MAP", keyType :: valueType :: Nil) =>
-      MapType(nodeToDataType(keyType), nodeToDataType(valueType))
-    case _ =>
-      noParseRule("DataType", node)
-  }
-
-  protected def nodeToStructField(node: ASTNode): StructField = node match {
-    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
-      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
-    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
-      val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
-      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
-    case _ =>
-      noParseRule("StructField", node)
-  }
-
-  protected def extractTableIdent(tableNameParts: ASTNode): TableIdentifier = {
-    tableNameParts.children.map {
-      case Token(part, Nil) => cleanIdentifier(part)
-    } match {
-      case Seq(tableOnly) => TableIdentifier(tableOnly)
-      case Seq(databaseName, table) => TableIdentifier(table, Some(databaseName))
-      case other => sys.error("Hive only supports tables names like 'tableName' " +
-        s"or 'databaseName.tableName', found '$other'")
-    }
-  }
-
-  /**
-   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
-   * is equivalent to
-   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
-   * Check the following link for details.
-   *
-https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
-   *
-   * The bitmask denotes the grouping expressions validity for a grouping set,
-   * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
-   * e.g. In superset (k1, k2, k3), (bit 2: k1, bit 1: k2, and bit 0: k3), the grouping id of
-   * GROUPING SETS (k1, k2) and (k2) should be 1 and 5 respectively.
-   */
-  protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
-    val (keyASTs, setASTs) = children.partition {
-      case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping sets
-      case _ => true // grouping keys
-    }
-
-    val keys = keyASTs.map(nodeToExpr)
-    val keyMap = keyASTs.zipWithIndex.toMap
-
-    val mask = (1 << keys.length) - 1
-    val bitmasks: Seq[Int] = setASTs.map {
-      case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
-        columns.foldLeft(mask)((bitmap, col) => {
-          val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2).getOrElse(
-            throw new AnalysisException(s"${col.treeString} doesn't show up in the GROUP BY list"))
-          // 0 means that the column at the given index is a grouping column, 1 means it is not,
-          // so we unset the bit in bitmap.
-          bitmap & ~(1 << (keys.length - 1 - keyIndex))
-        })
-      case _ => sys.error("Expect GROUPING SETS clause")
-    }
-
-    (keys, bitmasks)
-  }
-
-  protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
-    case Token("TOK_SHOWFUNCTIONS", args) =>
-      // Skip LIKE.
-      val pattern = args match {
-        case like :: nodes if like.text.toUpperCase == "LIKE" => nodes
-        case nodes => nodes
-      }
-
-      // Extract Database and Function name
-      pattern match {
-        case Nil =>
-          ShowFunctions(None, None)
-        case Token(name, Nil) :: Nil =>
-          ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
-        case Token(db, Nil) :: Token(name, Nil) :: Nil =>
-          ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
-            Some(unquoteString(cleanIdentifier(name))))
-        case _ =>
-          noParseRule("SHOW FUNCTIONS", node)
-      }
-
-    case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
-      DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
-
-    case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
-      val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
-        queryArgs match {
-          case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
-            val cteRelations = ctes.map { node =>
-              val relation = nodeToRelation(node).asInstanceOf[SubqueryAlias]
-              relation.alias -> relation
-            }
-            (Some(from.head), inserts, Some(cteRelations.toMap))
-          case Token("TOK_FROM", from) :: inserts =>
-            (Some(from.head), inserts, None)
-          case Token("TOK_INSERT", _) :: Nil =>
-            (None, queryArgs, None)
-        }
-
-      // Return one query for each insert clause.
-      val queries = insertClauses.map {
-        case Token("TOK_INSERT", singleInsert) =>
-          val (
-            intoClause ::
-              destClause ::
-              selectClause ::
-              selectDistinctClause ::
-              whereClause ::
-              groupByClause ::
-              rollupGroupByClause ::
-              cubeGroupByClause ::
-              groupingSetsClause ::
-              orderByClause ::
-              havingClause ::
-              sortByClause ::
-              clusterByClause ::
-              distributeByClause ::
-              limitClause ::
-              lateralViewClause ::
-              windowClause :: Nil) = {
-            getClauses(
-              Seq(
-                "TOK_INSERT_INTO",
-                "TOK_DESTINATION",
-                "TOK_SELECT",
-                "TOK_SELECTDI",
-                "TOK_WHERE",
-                "TOK_GROUPBY",
-                "TOK_ROLLUP_GROUPBY",
-                "TOK_CUBE_GROUPBY",
-                "TOK_GROUPING_SETS",
-                "TOK_ORDERBY",
-                "TOK_HAVING",
-                "TOK_SORTBY",
-                "TOK_CLUSTERBY",
-                "TOK_DISTRIBUTEBY",
-                "TOK_LIMIT",
-                "TOK_LATERAL_VIEW",
-                "WINDOW"),
-              singleInsert)
-          }
-
-          val relations = fromClause match {
-            case Some(f) => nodeToRelation(f)
-            case None => OneRowRelation
-          }
-
-          val withWhere = whereClause.map { whereNode =>
-            val Seq(whereExpr) = whereNode.children
-            Filter(nodeToExpr(whereExpr), relations)
-          }.getOrElse(relations)
-
-          val select = (selectClause orElse selectDistinctClause)
-            .getOrElse(sys.error("No select clause."))
-
-          val transformation = nodeToTransformation(select.children.head, withWhere)
-
-          val withLateralView = lateralViewClause.map { lv =>
-            nodeToGenerate(lv.children.head, outer = false, withWhere)
-          }.getOrElse(withWhere)
-
-          // The projection of the query can either be a normal projection, an aggregation
-          // (if there is a group by) or a script transformation.
-          val withProject: LogicalPlan = transformation.getOrElse {
-            val selectExpressions =
-              select.children.flatMap(selExprNodeToExpr).map(UnresolvedAlias(_))
-            Seq(
-              groupByClause.map(e => e match {
-                case Token("TOK_GROUPBY", children) =>
-                  // Not a transformation so must be either project or aggregation.
-                  Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
-                case _ => sys.error("Expect GROUP BY")
-              }),
-              groupingSetsClause.map(e => e match {
-                case Token("TOK_GROUPING_SETS", children) =>
-                  val(groupByExprs, masks) = extractGroupingSet(children)
-                  GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
-                case _ => sys.error("Expect GROUPING SETS")
-              }),
-              rollupGroupByClause.map(e => e match {
-                case Token("TOK_ROLLUP_GROUPBY", children) =>
-                  Aggregate(
-                    Seq(Rollup(children.map(nodeToExpr))),
-                    selectExpressions,
-                    withLateralView)
-                case _ => sys.error("Expect WITH ROLLUP")
-              }),
-              cubeGroupByClause.map(e => e match {
-                case Token("TOK_CUBE_GROUPBY", children) =>
-                  Aggregate(
-                    Seq(Cube(children.map(nodeToExpr))),
-                    selectExpressions,
-                    withLateralView)
-                case _ => sys.error("Expect WITH CUBE")
-              }),
-              Some(Project(selectExpressions, withLateralView))).flatten.head
-          }
-
-          // Handle HAVING clause.
-          val withHaving = havingClause.map { h =>
-            val havingExpr = h.children match { case Seq(hexpr) => nodeToExpr(hexpr) }
-            // Note that we added a cast to boolean. If the expression itself is already boolean,
-            // the optimizer will get rid of the unnecessary cast.
-            Filter(Cast(havingExpr, BooleanType), withProject)
-          }.getOrElse(withProject)
-
-          // Handle SELECT DISTINCT
-          val withDistinct =
-            if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving
-
-          // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
-          val withSort =
-            (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
-              case (Some(totalOrdering), None, None, None) =>
-                Sort(totalOrdering.children.map(nodeToSortOrder), global = true, withDistinct)
-              case (None, Some(perPartitionOrdering), None, None) =>
-                Sort(
-                  perPartitionOrdering.children.map(nodeToSortOrder),
-                  global = false, withDistinct)
-              case (None, None, Some(partitionExprs), None) =>
-                RepartitionByExpression(
-                  partitionExprs.children.map(nodeToExpr), withDistinct)
-              case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
-                Sort(
-                  perPartitionOrdering.children.map(nodeToSortOrder), global = false,
-                  RepartitionByExpression(
-                    partitionExprs.children.map(nodeToExpr),
-                    withDistinct))
-              case (None, None, None, Some(clusterExprs)) =>
-                Sort(
-                  clusterExprs.children.map(nodeToExpr).map(SortOrder(_, Ascending)),
-                  global = false,
-                  RepartitionByExpression(
-                    clusterExprs.children.map(nodeToExpr),
-                    withDistinct))
-              case (None, None, None, None) => withDistinct
-              case _ => sys.error("Unsupported set of ordering / distribution clauses.")
-            }
-
-          val withLimit =
-            limitClause.map(l => nodeToExpr(l.children.head))
-              .map(Limit(_, withSort))
-              .getOrElse(withSort)
-
-          // Collect all window specifications defined in the WINDOW clause.
-          val windowDefinitions = windowClause.map(_.children.collect {
-            case Token("TOK_WINDOWDEF",
-            Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
-              windowName -> nodesToWindowSpecification(spec)
-          }.toMap)
-          // Handle cases like
-          // window w1 as (partition by p_mfgr order by p_name
-          //               range between 2 preceding and 2 following),
-          //        w2 as w1
-          val resolvedCrossReference = windowDefinitions.map {
-            windowDefMap => windowDefMap.map {
-              case (windowName, WindowSpecReference(other)) =>
-                (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition])
-              case o => o.asInstanceOf[(String, WindowSpecDefinition)]
-            }
-          }
-
-          val withWindowDefinitions =
-            resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit)
-
-          // TOK_INSERT_INTO means to add files to the table.
-          // TOK_DESTINATION means to overwrite the table.
-          val resultDestination =
-            (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
-          val overwrite = intoClause.isEmpty
-          nodeToDest(
-            resultDestination,
-            withWindowDefinitions,
-            overwrite)
-      }
-
-      // If there are multiple INSERTS just UNION them together into one query.
-      val query = if (queries.length == 1) queries.head else Union(queries)
-
-      // return With plan if there is CTE
-      cteRelations.map(With(query, _)).getOrElse(query)
-
-    case Token("TOK_UNIONALL", left :: right :: Nil) =>
-      Union(nodeToPlan(left), nodeToPlan(right))
-    case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
-      Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
-    case Token("TOK_EXCEPT", left :: right :: Nil) =>
-      Except(nodeToPlan(left), nodeToPlan(right))
-    case Token("TOK_INTERSECT", left :: right :: Nil) =>
-      Intersect(nodeToPlan(left), nodeToPlan(right))
-
-    case _ =>
-      noParseRule("Plan", node)
-  }
-
-  val allJoinTokens = "(TOK_.*JOIN)".r
-  val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
-  protected def nodeToRelation(node: ASTNode): LogicalPlan = {
-    node match {
-      case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
-        SubqueryAlias(cleanIdentifier(alias), nodeToPlan(query))
-
-      case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
-        nodeToGenerate(
-          selectClause,
-          outer = isOuter.nonEmpty,
-          nodeToRelation(relationClause))
-
-      /* All relations, possibly with aliases or sampling clauses. */
-      case Token("TOK_TABREF", clauses) =>
-        // If the last clause is not a token then it's the alias of the table.
-        val (nonAliasClauses, aliasClause) =
-          if (clauses.last.text.startsWith("TOK")) {
-            (clauses, None)
-          } else {
-            (clauses.dropRight(1), Some(clauses.last))
-          }
-
-        val (Some(tableNameParts) ::
-          splitSampleClause ::
-          bucketSampleClause :: Nil) = {
-          getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"),
-            nonAliasClauses)
-        }
-
-        val tableIdent = extractTableIdent(tableNameParts)
-        val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
-        val relation = UnresolvedRelation(tableIdent, alias)
-
-        // Apply sampling if requested.
-        (bucketSampleClause orElse splitSampleClause).map {
-          case Token("TOK_TABLESPLITSAMPLE",
-          Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
-            Limit(Literal(count.toInt), relation)
-          case Token("TOK_TABLESPLITSAMPLE",
-          Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
-            // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
-            // function takes X PERCENT as the input and the range of X is [0, 100], we need to
-            // adjust the fraction.
-            require(
-              fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
-                && fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
-              s"Sampling fraction ($fraction) must be on interval [0, 100]")
-            Sample(0.0, fraction.toDouble / 100, withReplacement = false,
-              (math.random * 1000).toInt,
-              relation)(
-              isTableSample = true)
-          case Token("TOK_TABLEBUCKETSAMPLE",
-          Token(numerator, Nil) ::
-            Token(denominator, Nil) :: Nil) =>
-            val fraction = numerator.toDouble / denominator.toDouble
-            Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, relation)(
-              isTableSample = true)
-          case a =>
-            noParseRule("Sampling", a)
-        }.getOrElse(relation)
-
-      case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) =>
-        if (!(other.size <= 1)) {
-          sys.error(s"Unsupported join operation: $other")
-        }
-
-        val joinType = joinToken match {
-          case "TOK_JOIN" => Inner
-          case "TOK_CROSSJOIN" => Inner
-          case "TOK_RIGHTOUTERJOIN" => RightOuter
-          case "TOK_LEFTOUTERJOIN" => LeftOuter
-          case "TOK_FULLOUTERJOIN" => FullOuter
-          case "TOK_LEFTSEMIJOIN" => LeftSemi
-          case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
-          case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
-          case "TOK_NATURALJOIN" => NaturalJoin(Inner)
-          case "TOK_NATURALRIGHTOUTERJOIN" => NaturalJoin(RightOuter)
-          case "TOK_NATURALLEFTOUTERJOIN" => NaturalJoin(LeftOuter)
-          case "TOK_NATURALFULLOUTERJOIN" => NaturalJoin(FullOuter)
-        }
-        Join(nodeToRelation(relation1),
-          nodeToRelation(relation2),
-          joinType,
-          other.headOption.map(nodeToExpr))
-
-      case _ =>
-        noParseRule("Relation", node)
-    }
-  }
-
-  protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
-    case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
-      SortOrder(nodeToExpr(sortExpr), Ascending)
-    case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
-      SortOrder(nodeToExpr(sortExpr), Descending)
-    case _ =>
-      noParseRule("SortOrder", node)
-  }
-
-  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
-  protected def nodeToDest(
-      node: ASTNode,
-      query: LogicalPlan,
-      overwrite: Boolean): LogicalPlan = node match {
-    case Token(destinationToken(),
-    Token("TOK_DIR",
-    Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
-      query
-
-    case Token(destinationToken(),
-    Token("TOK_TAB",
-    tableArgs) :: Nil) =>
-      val Some(tableNameParts) :: partitionClause :: Nil =
-        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
-
-      val tableIdent = extractTableIdent(tableNameParts)
-
-      val partitionKeys = partitionClause.map(_.children.map {
-        // Parse partitions. We also make keys case insensitive.
-        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
-          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
-        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
-          cleanIdentifier(key.toLowerCase) -> None
-      }.toMap).getOrElse(Map.empty)
-
-      InsertIntoTable(
-        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = false)
-
-    case Token(destinationToken(),
-    Token("TOK_TAB",
-    tableArgs) ::
-      Token("TOK_IFNOTEXISTS",
-      ifNotExists) :: Nil) =>
-      val Some(tableNameParts) :: partitionClause :: Nil =
-        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
-
-      val tableIdent = extractTableIdent(tableNameParts)
-
-      val partitionKeys = partitionClause.map(_.children.map {
-        // Parse partitions. We also make keys case insensitive.
-        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
-          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
-        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
-          cleanIdentifier(key.toLowerCase) -> None
-      }.toMap).getOrElse(Map.empty)
-
-      InsertIntoTable(
-        UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = true)
-
-    case _ =>
-      noParseRule("Destination", node)
-  }
-
-  protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = node match {
-    case Token("TOK_SELEXPR", e :: Nil) =>
-      Some(nodeToExpr(e))
-
-    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
-      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
-
-    case Token("TOK_SELEXPR", e :: aliasChildren) =>
-      val aliasNames = aliasChildren.collect {
-        case Token(name, Nil) => cleanIdentifier(name)
-      }
-      Some(MultiAlias(nodeToExpr(e), aliasNames))
-
-    /* Hints are ignored */
-    case Token("TOK_HINTLIST", _) => None
-
-    case _ =>
-      noParseRule("Select", node)
-  }
-
-  protected val escapedIdentifier = "`(.+)`".r
-  protected val doubleQuotedString = "\"([^\"]+)\"".r
-  protected val singleQuotedString = "'([^']+)'".r
-
-  protected def unquoteString(str: String) = str match {
-    case singleQuotedString(s) => s
-    case doubleQuotedString(s) => s
-    case other => other
-  }
-
-  /** Strips backticks from ident if present */
-  protected def cleanIdentifier(ident: String): String = ident match {
-    case escapedIdentifier(i) => i
-    case plainIdent => plainIdent
-  }
-
-  /* Case insensitive matches */
-  val COUNT = "(?i)COUNT".r
-  val SUM = "(?i)SUM".r
-  val AND = "(?i)AND".r
-  val OR = "(?i)OR".r
-  val NOT = "(?i)NOT".r
-  val TRUE = "(?i)TRUE".r
-  val FALSE = "(?i)FALSE".r
-  val LIKE = "(?i)LIKE".r
-  val RLIKE = "(?i)RLIKE".r
-  val REGEXP = "(?i)REGEXP".r
-  val IN = "(?i)IN".r
-  val DIV = "(?i)DIV".r
-  val BETWEEN = "(?i)BETWEEN".r
-  val WHEN = "(?i)WHEN".r
-  val CASE = "(?i)CASE".r
-
-  val INTEGRAL = "[+-]?\\d+".r
-  val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
-
-  protected def nodeToExpr(node: ASTNode): Expression = node match {
-    /* Attribute References */
-    case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
-      UnresolvedAttribute.quoted(cleanIdentifier(name))
-    case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
-      nodeToExpr(qualifier) match {
-        case UnresolvedAttribute(nameParts) =>
-          UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
-        case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
-      }
-    case Token("TOK_SUBQUERY_EXPR", Token("TOK_SUBQUERY_OP", Nil) :: subquery :: Nil) =>
-      ScalarSubquery(nodeToPlan(subquery))
-
-    /* Stars (*) */
-    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
-    // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
-    // has a single child which is tableName.
-    case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
-      UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
-
-    /* Aggregate Functions */
-    case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
-      Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
-    case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
-      Count(Literal(1)).toAggregateExpression()
-
-    /* Casts */
-    case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), StringType)
-    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), StringType)
-    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), StringType)
-    case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), IntegerType)
-    case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), LongType)
-    case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), FloatType)
-    case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DoubleType)
-    case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), ShortType)
-    case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), ByteType)
-    case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), BinaryType)
-    case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), BooleanType)
-    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, scale.text.toInt))
-    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
-    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
-    case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), TimestampType)
-    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DateType)
-
-    /* Arithmetic */
-    case Token("+", child :: Nil) => nodeToExpr(child)
-    case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
-    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
-    case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
-    case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
-    case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
-    case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
-    case Token(DIV(), left :: right:: Nil) =>
-      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
-    case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
-    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
-    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
-    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
-
-    /* Comparisons */
-    case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
-    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
-    case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
-    case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
-    case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
-    case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
-    case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
-    case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
-    case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
-    case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right))
-    case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
-    case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
-    case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
-      IsNotNull(nodeToExpr(child))
-    case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
-      IsNull(nodeToExpr(child))
-    case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
-      In(nodeToExpr(value), list.map(nodeToExpr))
-    case Token("TOK_FUNCTION",
-    Token(BETWEEN(), Nil) ::
-      kw ::
-      target ::
-      minValue ::
-      maxValue :: Nil) =>
-
-      val targetExpression = nodeToExpr(target)
-      val betweenExpr =
-        And(
-          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
-          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
-      kw match {
-        case Token("KW_FALSE", Nil) => betweenExpr
-        case Token("KW_TRUE", Nil) => Not(betweenExpr)
-      }
-
-    /* Boolean Logic */
-    case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
-    case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
-    case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
-    case Token("!", child :: Nil) => Not(nodeToExpr(child))
-
-    /* Case statements */
-    case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
-      CaseWhen.createFromParser(branches.map(nodeToExpr))
-    case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
-      val keyExpr = nodeToExpr(branches.head)
-      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
-
-    /* Complex datatype manipulation */
-    case Token("[", child :: ordinal :: Nil) =>
-      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
-
-    /* Window Functions */
-    case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
-      val function = nodeToExpr(node.copy(children = node.children.init))
-      nodesToWindowSpecification(spec) match {
-        case reference: WindowSpecReference =>
-          UnresolvedWindowExpression(function, reference)
-        case definition: WindowSpecDefinition =>
-          WindowExpression(function, definition)
-      }
-
-    /* UDFs - Must be last otherwise will preempt built in functions */
-    case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
-      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
-    // Aggregate function with DISTINCT keyword.
-    case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
-      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
-    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
-      UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)
-
-    /* Literals */
-    case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
-    case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
-    case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
-    case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
-      Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
-
-    case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
-      Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
-
-    case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
-      Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
-
-    case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
-      Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
-
-    case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
-      Literal(ast.text.toDouble)
-
-    case ast if ast.tokenType == SparkSqlParser.Number =>
-      val text = ast.text
-      text match {
-        case INTEGRAL() =>
-          BigDecimal(text) match {
-            case v if v.isValidInt =>
-              Literal(v.intValue())
-            case v if v.isValidLong =>
-              Literal(v.longValue())
-            case v => Literal(v.underlying())
-          }
-        case DECIMAL(_*) =>
-          Literal(BigDecimal(text).underlying())
-        case _ =>
-          // Convert a scientifically notated decimal into a double.
-          Literal(text.toDouble)
-      }
-    case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
-      Literal(ParseUtils.unescapeSQLString(ast.text))
-
-    case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
-      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
-
-    case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
-      Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
-
-    case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
-      Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
-
-    case Token("TOK_INTERVAL", elements) =>
-      var interval = new CalendarInterval(0, 0)
-      var updated = false
-      elements.foreach {
-        // The interval node will always contain children for all possible time units. A child node
-        // is only useful when it contains exactly one (numeric) child.
-        case e @ Token(name, Token(value, Nil) :: Nil) =>
-          val unit = name match {
-            case "TOK_INTERVAL_YEAR_LITERAL" => "year"
-            case "TOK_INTERVAL_MONTH_LITERAL" => "month"
-            case "TOK_INTERVAL_WEEK_LITERAL" => "week"
-            case "TOK_INTERVAL_DAY_LITERAL" => "day"
-            case "TOK_INTERVAL_HOUR_LITERAL" => "hour"
-            case "TOK_INTERVAL_MINUTE_LITERAL" => "minute"
-            case "TOK_INTERVAL_SECOND_LITERAL" => "second"
-            case "TOK_INTERVAL_MILLISECOND_LITERAL" => "millisecond"
-            case "TOK_INTERVAL_MICROSECOND_LITERAL" => "microsecond"
-            case _ => noParseRule(s"Interval($name)", e)
-          }
-          interval = interval.add(CalendarInterval.fromSingleUnitString(unit, value))
-          updated = true
-        case _ =>
-      }
-      if (!updated) {
-        throw new AnalysisException("at least one time unit should be given for interval literal")
-      }
-      Literal(interval)
-
-    case _ =>
-      noParseRule("Expression", node)
-  }
-
-  /* Case insensitive matches for Window Specification */
-  val PRECEDING = "(?i)preceding".r
-  val FOLLOWING = "(?i)following".r
-  val CURRENT = "(?i)current".r
-  protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match {
-    case Token(windowName, Nil) :: Nil =>
-      // Refer to a window spec defined in the window clause.
-      WindowSpecReference(windowName)
-    case Nil =>
-      // OVER()
-      WindowSpecDefinition(
-        partitionSpec = Nil,
-        orderSpec = Nil,
-        frameSpecification = UnspecifiedFrame)
-    case spec =>
-      val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
-        getClauses(
-          Seq(
-            "TOK_PARTITIONINGSPEC",
-            "TOK_WINDOWRANGE",
-            "TOK_WINDOWVALUES"),
-          spec)
-
-      // Handle Partition By and Order By.
-      val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering =>
-        val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) =
-          getClauses(
-            Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"),
-            partitionAndOrdering.children)
-
-        (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match {
-          case (Some(partitionByExpr), Some(orderByExpr), None) =>
-            (partitionByExpr.children.map(nodeToExpr),
-              orderByExpr.children.map(nodeToSortOrder))
-          case (Some(partitionByExpr), None, None) =>
-            (partitionByExpr.children.map(nodeToExpr), Nil)
-          case (None, Some(orderByExpr), None) =>
-            (Nil, orderByExpr.children.map(nodeToSortOrder))
-          case (None, None, Some(clusterByExpr)) =>
-            val expressions = clusterByExpr.children.map(nodeToExpr)
-            (expressions, expressions.map(SortOrder(_, Ascending)))
-          case _ =>
-            noParseRule("Partition & Ordering", partitionAndOrdering)
-        }
-      }.getOrElse {
-        (Nil, Nil)
-      }
-
-      // Handle Window Frame
-      val windowFrame =
-        if (rowFrame.isEmpty && rangeFrame.isEmpty) {
-          UnspecifiedFrame
-        } else {
-          val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
-          def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
-            case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
-              if (count.toLowerCase() == "unbounded") {
-                UnboundedPreceding
-              } else {
-                ValuePreceding(count.toInt)
-              }
-            case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
-              if (count.toLowerCase() == "unbounded") {
-                UnboundedFollowing
-              } else {
-                ValueFollowing(count.toInt)
-              }
-            case Token(CURRENT(), Nil) => CurrentRow
-            case _ =>
-              noParseRule("Window Frame Boundary", node)
-          }
-
-          rowFrame.orElse(rangeFrame).map { frame =>
-            frame.children match {
-              case precedingNode :: followingNode :: Nil =>
-                SpecifiedWindowFrame(
-                  frameType,
-                  nodeToBoundary(precedingNode),
-                  nodeToBoundary(followingNode))
-              case precedingNode :: Nil =>
-                SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow)
-              case _ =>
-                noParseRule("Window Frame", frame)
-            }
-          }.getOrElse(sys.error(s"If you see this, please file a bug report with your query."))
-        }
-
-      WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
-  }
-
-  protected def nodeToTransformation(
-      node: ASTNode,
-      child: LogicalPlan): Option[ScriptTransformation] = None
-
-  val explode = "(?i)explode".r
-  val jsonTuple = "(?i)json_tuple".r
-  protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
-    val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
-
-    val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
-
-    val generator = clauses.head match {
-      case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>
-        Explode(nodeToExpr(childNode))
-      case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
-        JsonTuple(children.map(nodeToExpr))
-      case other =>
-        nodeToGenerator(other)
-    }
-
-    val attributes = clauses.collect {
-      case Token(a, Nil) => UnresolvedAttribute(a.toLowerCase)
-    }
-
-    Generate(generator, join = true, outer = outer, Some(alias.toLowerCase), attributes, child)
-  }
-
-  protected def nodeToGenerator(node: ASTNode): Generator = noParseRule("Generator", node)
-
-  protected def noParseRule(msg: String, node: ASTNode): Nothing = throw new NotImplementedError(
-    s"[$msg]: No parse rules for ASTNode type: ${node.tokenType}, tree:\n${node.treeString}")
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
deleted file mode 100644
index 24ec452..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/**
- * Interface for a parser.
- */
-trait ParserInterface {
-  /** Creates LogicalPlan for a given SQL string. */
-  def parsePlan(sqlText: String): LogicalPlan
-
-  /** Creates Expression for a given SQL string. */
-  def parseExpression(sqlText: String): Expression
-
-  /** Creates TableIdentifier for a given SQL string. */
-  def parseTableIdentifier(sqlText: String): TableIdentifier
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
new file mode 100644
index 0000000..7b456a6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
+import scala.util.parsing.input.CharArrayReader.EofCh
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+private[sql] abstract class AbstractSparkSQLParser
+  extends StandardTokenParsers with PackratParsers with ParserInterface {
+
+  def parsePlan(input: String): LogicalPlan = synchronized {
+    // Initialize the Keywords.
+    initLexical
+    phrase(start)(new lexical.Scanner(input)) match {
+      case Success(plan, _) => plan
+      case failureOrError => sys.error(failureOrError.toString)
+    }
+  }
+  /* One time initialization of lexical.This avoid reinitialization of  lexical in parse method */
+  protected lazy val initLexical: Unit = lexical.initialize(reservedWords)
+
+  protected case class Keyword(str: String) {
+    def normalize: String = lexical.normalizeKeyword(str)
+    def parser: Parser[String] = normalize
+  }
+
+  protected implicit def asParser(k: Keyword): Parser[String] = k.parser
+
+  // By default, use Reflection to find the reserved words defined in the sub class.
+  // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
+  // method during the parent class instantiation, because the sub class instance
+  // isn't created yet.
+  protected lazy val reservedWords: Seq[String] =
+    this
+      .getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].normalize)
+
+  // Set the keywords as empty by default, will change that later.
+  override val lexical = new SqlLexical
+
+  protected def start: Parser[LogicalPlan]
+
+  // Returns the whole input string
+  protected lazy val wholeInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(in.source.toString, in.drop(in.source.length()))
+  }
+
+  // Returns the rest of the input string that are not parsed yet
+  protected lazy val restInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(
+        in.source.subSequence(in.offset, in.source.length()).toString,
+        in.drop(in.source.length()))
+  }
+}
+
+class SqlLexical extends StdLexical {
+  case class DecimalLit(chars: String) extends Token {
+    override def toString: String = chars
+  }
+
+  /* This is a work around to support the lazy setting */
+  def initialize(keywords: Seq[String]): Unit = {
+    reserved.clear()
+    reserved ++= keywords
+  }
+
+  /* Normal the keyword string */
+  def normalizeKeyword(str: String): String = str.toLowerCase
+
+  delimiters += (
+    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
+    ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
+  )
+
+  protected override def processIdent(name: String) = {
+    val token = normalizeKeyword(name)
+    if (reserved contains token) Keyword(token) else Identifier(name)
+  }
+
+  override lazy val token: Parser[Token] =
+    ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
+    | '.' ~> (rep1(digit) ~ scientificNotation) ^^
+      { case i ~ s => DecimalLit("0." + i.mkString + s) }
+    | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
+      { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
+    | digit.* ~ identChar ~ (identChar | digit).* ^^
+      { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
+    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
+        case i ~ None => NumericLit(i.mkString)
+        case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
+      }
+    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
+      { case chars => Identifier(chars mkString "") }
+    | EofCh ^^^ EOF
+    | '\'' ~> failure("unclosed string literal")
+    | '"' ~> failure("unclosed string literal")
+    | delim
+    | failure("illegal character")
+    )
+
+  override def identChar: Parser[Elem] = letter | elem('_')
+
+  private lazy val scientificNotation: Parser[String] =
+    (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
+      case s ~ rest => "e" + s.mkString + rest.mkString
+    }
+
+  override def whitespace: Parser[Any] =
+    ( whitespaceChar
+    | '/' ~ '*' ~ comment
+    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
+    | '#' ~ chrExcept(EofCh, '\n').*
+    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
+    | '/' ~ '*' ~ failure("unclosed comment")
+    ).*
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org