You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2016/03/04 19:32:05 UTC
[1/3] spark git commit: [SPARK-13633][SQL] Move things into
catalyst.parser package
Repository: spark
Updated Branches:
refs/heads/master 83302c3bf -> b7d414742
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
deleted file mode 100644
index bebf708..0000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DataTypeParserSuite.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql.catalyst.util
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.types._
-
-class DataTypeParserSuite extends SparkFunSuite {
-
- def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
- test(s"parse ${dataTypeString.replace("\n", "")}") {
- assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
- }
- }
-
- def unsupported(dataTypeString: String): Unit = {
- test(s"$dataTypeString is not supported") {
- intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
- }
- }
-
- checkDataType("int", IntegerType)
- checkDataType("integer", IntegerType)
- checkDataType("BooLean", BooleanType)
- checkDataType("tinYint", ByteType)
- checkDataType("smallINT", ShortType)
- checkDataType("INT", IntegerType)
- checkDataType("INTEGER", IntegerType)
- checkDataType("bigint", LongType)
- checkDataType("float", FloatType)
- checkDataType("dOUBle", DoubleType)
- checkDataType("decimal(10, 5)", DecimalType(10, 5))
- checkDataType("decimal", DecimalType.USER_DEFAULT)
- checkDataType("DATE", DateType)
- checkDataType("timestamp", TimestampType)
- checkDataType("string", StringType)
- checkDataType("ChaR(5)", StringType)
- checkDataType("varchAr(20)", StringType)
- checkDataType("cHaR(27)", StringType)
- checkDataType("BINARY", BinaryType)
-
- checkDataType("array<doublE>", ArrayType(DoubleType, true))
- checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
- checkDataType(
- "array<struct<tinYint:tinyint>>",
- ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
- )
- checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
- checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
- checkDataType(
- "MAP<int, struct<varchar:string>>",
- MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
- )
-
- checkDataType(
- "struct<intType: int, ts:timestamp>",
- StructType(
- StructField("intType", IntegerType, true) ::
- StructField("ts", TimestampType, true) :: Nil)
- )
- // It is fine to use the data type string as the column name.
- checkDataType(
- "Struct<int: int, timestamp:timestamp>",
- StructType(
- StructField("int", IntegerType, true) ::
- StructField("timestamp", TimestampType, true) :: Nil)
- )
- checkDataType(
- """
- |struct<
- | struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
- | MAP:Map<timestamp, varchar(10)>,
- | arrAy:Array<double>,
- | anotherArray:Array<char(9)>>
- """.stripMargin,
- StructType(
- StructField("struct",
- StructType(
- StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
- StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
- StructField("MAP", MapType(TimestampType, StringType), true) ::
- StructField("arrAy", ArrayType(DoubleType, true), true) ::
- StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
- )
- // A column name can be a reserved word in our DDL parser and SqlParser.
- checkDataType(
- "Struct<TABLE: string, CASE:boolean>",
- StructType(
- StructField("TABLE", StringType, true) ::
- StructField("CASE", BooleanType, true) :: Nil)
- )
- // Use backticks to quote column names having special characters.
- checkDataType(
- "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
- StructType(
- StructField("x+y", IntegerType, true) ::
- StructField("!@#$%^&*()", StringType, true) ::
- StructField("1_2.345<>:\"", StringType, true) :: Nil)
- )
- // Empty struct.
- checkDataType("strUCt<>", StructType(Nil))
-
- unsupported("it is not a data type")
- unsupported("struct<x+y: int, 1.1:timestamp>")
- unsupported("struct<x: int")
- unsupported("struct<x int, y string>")
- unsupported("struct<`x``y` int>")
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 6c7929c..0fa8159 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -24,7 +24,8 @@ import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.{usePrettyExpression, DataTypeParser}
+import org.apache.spark.sql.catalyst.parser.DataTypeParser
+import org.apache.spark.sql.catalyst.util.usePrettyExpression
import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.types._
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 39dad16..c742bf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.encoders.encoderFor
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution._
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
index bc690f6..9143258 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
@@ -17,9 +17,9 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalyst.{CatalystQl, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.parser.{ASTNode, ParserConf, SimpleParserConf}
+import org.apache.spark.sql.catalyst.parser.{ASTNode, CatalystQl, ParserConf, SimpleParserConf}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources._
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
index 7ea098c..b8af832 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
@@ -45,7 +45,7 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.{RDD, SqlNewHadoopPartition, SqlNewHadoopRDD}
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.LegacyTypeStringParser
+import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser
import org.apache.spark.sql.execution.datasources.{PartitionSpec, _}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sources._
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index b9873d3..86412c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -22,11 +22,12 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag}
import scala.util.Try
import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.{CatalystQl, ScalaReflection}
+import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.parser.CatalystQl
import org.apache.spark.sql.catalyst.plans.logical.BroadcastHint
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.types._
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index f93a405..f5f3654 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -18,9 +18,9 @@
package org.apache.spark.sql.internal
import org.apache.spark.sql.{ContinuousQueryManager, SQLContext, UDFRegistration}
-import org.apache.spark.sql.catalyst.ParserInterface
import org.apache.spark.sql.catalyst.analysis.{Analyzer, Catalog, FunctionRegistry, SimpleCatalog}
import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.datasources.{PreInsertCastAndRename, ResolveDataSource}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index ee8ec2d..a053108 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -36,10 +36,10 @@ import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{Catalog, MultiInstanceRelation, OverrideCatalog}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.util.DataTypeParser
import org.apache.spark.sql.execution.{datasources, FileRelation}
import org.apache.spark.sql.execution.datasources.{Partition => ParquetPartition, _}
import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 0d4b79f..8207e78 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -18,8 +18,8 @@
package org.apache.spark.sql.hive
import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.ParserInterface
import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, OverrideCatalog}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.execution.{python, SparkPlanner}
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.internal.{SessionState, SQLConf}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org
[2/3] spark git commit: [SPARK-13633][SQL] Move things into
catalyst.parser package
Posted by an...@apache.org.
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
new file mode 100644
index 0000000..d231841
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/CatalystQl.scala
@@ -0,0 +1,1012 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import java.sql.Date
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.random.RandomSampler
+
+
+/**
+ * This class translates SQL to Catalyst [[LogicalPlan]]s or [[Expression]]s.
+ */
+private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserInterface {
+ object Token {
+ def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
+ CurrentOrigin.setPosition(node.line, node.positionInLine)
+ node.pattern
+ }
+ }
+
+ /**
+ * The safeParse method allows a user to focus on the parsing/AST transformation logic. This
+ * method will take care of possible errors during the parsing process.
+ */
+ protected def safeParse[T](sql: String, ast: ASTNode)(toResult: ASTNode => T): T = {
+ try {
+ toResult(ast)
+ } catch {
+ case e: MatchError => throw e
+ case e: AnalysisException => throw e
+ case e: Exception =>
+ throw new AnalysisException(e.getMessage)
+ case e: NotImplementedError =>
+ throw new AnalysisException(
+ s"""Unsupported language features in query
+ |== SQL ==
+ |$sql
+ |== AST ==
+ |${ast.treeString}
+ |== Error ==
+ |$e
+ |== Stacktrace ==
+ |${e.getStackTrace.head}
+ """.stripMargin)
+ }
+ }
+
+ /** Creates LogicalPlan for a given SQL string. */
+ def parsePlan(sql: String): LogicalPlan =
+ safeParse(sql, ParseDriver.parsePlan(sql, conf))(nodeToPlan)
+
+ /** Creates Expression for a given SQL string. */
+ def parseExpression(sql: String): Expression =
+ safeParse(sql, ParseDriver.parseExpression(sql, conf))(selExprNodeToExpr(_).get)
+
+ /** Creates TableIdentifier for a given SQL string. */
+ def parseTableIdentifier(sql: String): TableIdentifier =
+ safeParse(sql, ParseDriver.parseTableName(sql, conf))(extractTableIdent)
+
+ def parseDdl(sql: String): Seq[Attribute] = {
+ safeParse(sql, ParseDriver.parseExpression(sql, conf)) { ast =>
+ val Token("TOK_CREATETABLE", children) = ast
+ children
+ .find(_.text == "TOK_TABCOLLIST")
+ .getOrElse(sys.error("No columnList!"))
+ .flatMap(_.children.map(nodeToAttribute))
+ }
+ }
+
+ protected def getClauses(
+ clauseNames: Seq[String],
+ nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
+ var remainingNodes = nodeList
+ val clauses = clauseNames.map { clauseName =>
+ val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase == clauseName)
+ remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil)
+ matches.headOption
+ }
+
+ if (remainingNodes.nonEmpty) {
+ sys.error(
+ s"""Unhandled clauses: ${remainingNodes.map(_.treeString).mkString("\n")}.
+ |You are likely trying to use an unsupported Hive feature."""".stripMargin)
+ }
+ clauses
+ }
+
+ protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): ASTNode =
+ getClauseOption(clauseName, nodeList).getOrElse(sys.error(
+ s"Expected clause $clauseName missing from ${nodeList.map(_.treeString).mkString("\n")}"))
+
+ protected def getClauseOption(clauseName: String, nodeList: Seq[ASTNode]): Option[ASTNode] = {
+ nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
+ case Seq(oneMatch) => Some(oneMatch)
+ case Seq() => None
+ case _ => sys.error(s"Found multiple instances of clause $clauseName")
+ }
+ }
+
+ protected def nodeToAttribute(node: ASTNode): Attribute = node match {
+ case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
+ AttributeReference(colName, nodeToDataType(dataType), nullable = true)()
+ case _ =>
+ noParseRule("Attribute", node)
+ }
+
+ protected def nodeToDataType(node: ASTNode): DataType = node match {
+ case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+ DecimalType(precision.text.toInt, scale.text.toInt)
+ case Token("TOK_DECIMAL", precision :: Nil) =>
+ DecimalType(precision.text.toInt, 0)
+ case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
+ case Token("TOK_BIGINT", Nil) => LongType
+ case Token("TOK_INT", Nil) => IntegerType
+ case Token("TOK_TINYINT", Nil) => ByteType
+ case Token("TOK_SMALLINT", Nil) => ShortType
+ case Token("TOK_BOOLEAN", Nil) => BooleanType
+ case Token("TOK_STRING", Nil) => StringType
+ case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
+ case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
+ case Token("TOK_FLOAT", Nil) => FloatType
+ case Token("TOK_DOUBLE", Nil) => DoubleType
+ case Token("TOK_DATE", Nil) => DateType
+ case Token("TOK_TIMESTAMP", Nil) => TimestampType
+ case Token("TOK_BINARY", Nil) => BinaryType
+ case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
+ case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
+ StructType(fields.map(nodeToStructField))
+ case Token("TOK_MAP", keyType :: valueType :: Nil) =>
+ MapType(nodeToDataType(keyType), nodeToDataType(valueType))
+ case _ =>
+ noParseRule("DataType", node)
+ }
+
+ protected def nodeToStructField(node: ASTNode): StructField = node match {
+ case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
+ StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
+ case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
+ val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
+ StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
+ case _ =>
+ noParseRule("StructField", node)
+ }
+
+ protected def extractTableIdent(tableNameParts: ASTNode): TableIdentifier = {
+ tableNameParts.children.map {
+ case Token(part, Nil) => cleanIdentifier(part)
+ } match {
+ case Seq(tableOnly) => TableIdentifier(tableOnly)
+ case Seq(databaseName, table) => TableIdentifier(table, Some(databaseName))
+ case other => sys.error("Hive only supports tables names like 'tableName' " +
+ s"or 'databaseName.tableName', found '$other'")
+ }
+ }
+
+ /**
+ * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
+ * is equivalent to
+ * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
+ * Check the following link for details.
+ *
+https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
+ *
+ * The bitmask denotes the grouping expressions validity for a grouping set,
+ * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
+ * e.g. In superset (k1, k2, k3), (bit 2: k1, bit 1: k2, and bit 0: k3), the grouping id of
+ * GROUPING SETS (k1, k2) and (k2) should be 1 and 5 respectively.
+ */
+ protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
+ val (keyASTs, setASTs) = children.partition {
+ case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping sets
+ case _ => true // grouping keys
+ }
+
+ val keys = keyASTs.map(nodeToExpr)
+ val keyMap = keyASTs.zipWithIndex.toMap
+
+ val mask = (1 << keys.length) - 1
+ val bitmasks: Seq[Int] = setASTs.map {
+ case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
+ columns.foldLeft(mask)((bitmap, col) => {
+ val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2).getOrElse(
+ throw new AnalysisException(s"${col.treeString} doesn't show up in the GROUP BY list"))
+ // 0 means that the column at the given index is a grouping column, 1 means it is not,
+ // so we unset the bit in bitmap.
+ bitmap & ~(1 << (keys.length - 1 - keyIndex))
+ })
+ case _ => sys.error("Expect GROUPING SETS clause")
+ }
+
+ (keys, bitmasks)
+ }
+
+ protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
+ case Token("TOK_SHOWFUNCTIONS", args) =>
+ // Skip LIKE.
+ val pattern = args match {
+ case like :: nodes if like.text.toUpperCase == "LIKE" => nodes
+ case nodes => nodes
+ }
+
+ // Extract Database and Function name
+ pattern match {
+ case Nil =>
+ ShowFunctions(None, None)
+ case Token(name, Nil) :: Nil =>
+ ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
+ case Token(db, Nil) :: Token(name, Nil) :: Nil =>
+ ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
+ Some(unquoteString(cleanIdentifier(name))))
+ case _ =>
+ noParseRule("SHOW FUNCTIONS", node)
+ }
+
+ case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
+ DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
+
+ case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
+ val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
+ queryArgs match {
+ case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
+ val cteRelations = ctes.map { node =>
+ val relation = nodeToRelation(node).asInstanceOf[SubqueryAlias]
+ relation.alias -> relation
+ }
+ (Some(from.head), inserts, Some(cteRelations.toMap))
+ case Token("TOK_FROM", from) :: inserts =>
+ (Some(from.head), inserts, None)
+ case Token("TOK_INSERT", _) :: Nil =>
+ (None, queryArgs, None)
+ }
+
+ // Return one query for each insert clause.
+ val queries = insertClauses.map {
+ case Token("TOK_INSERT", singleInsert) =>
+ val (
+ intoClause ::
+ destClause ::
+ selectClause ::
+ selectDistinctClause ::
+ whereClause ::
+ groupByClause ::
+ rollupGroupByClause ::
+ cubeGroupByClause ::
+ groupingSetsClause ::
+ orderByClause ::
+ havingClause ::
+ sortByClause ::
+ clusterByClause ::
+ distributeByClause ::
+ limitClause ::
+ lateralViewClause ::
+ windowClause :: Nil) = {
+ getClauses(
+ Seq(
+ "TOK_INSERT_INTO",
+ "TOK_DESTINATION",
+ "TOK_SELECT",
+ "TOK_SELECTDI",
+ "TOK_WHERE",
+ "TOK_GROUPBY",
+ "TOK_ROLLUP_GROUPBY",
+ "TOK_CUBE_GROUPBY",
+ "TOK_GROUPING_SETS",
+ "TOK_ORDERBY",
+ "TOK_HAVING",
+ "TOK_SORTBY",
+ "TOK_CLUSTERBY",
+ "TOK_DISTRIBUTEBY",
+ "TOK_LIMIT",
+ "TOK_LATERAL_VIEW",
+ "WINDOW"),
+ singleInsert)
+ }
+
+ val relations = fromClause match {
+ case Some(f) => nodeToRelation(f)
+ case None => OneRowRelation
+ }
+
+ val withWhere = whereClause.map { whereNode =>
+ val Seq(whereExpr) = whereNode.children
+ Filter(nodeToExpr(whereExpr), relations)
+ }.getOrElse(relations)
+
+ val select = (selectClause orElse selectDistinctClause)
+ .getOrElse(sys.error("No select clause."))
+
+ val transformation = nodeToTransformation(select.children.head, withWhere)
+
+ val withLateralView = lateralViewClause.map { lv =>
+ nodeToGenerate(lv.children.head, outer = false, withWhere)
+ }.getOrElse(withWhere)
+
+ // The projection of the query can either be a normal projection, an aggregation
+ // (if there is a group by) or a script transformation.
+ val withProject: LogicalPlan = transformation.getOrElse {
+ val selectExpressions =
+ select.children.flatMap(selExprNodeToExpr).map(UnresolvedAlias(_))
+ Seq(
+ groupByClause.map(e => e match {
+ case Token("TOK_GROUPBY", children) =>
+ // Not a transformation so must be either project or aggregation.
+ Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
+ case _ => sys.error("Expect GROUP BY")
+ }),
+ groupingSetsClause.map(e => e match {
+ case Token("TOK_GROUPING_SETS", children) =>
+ val(groupByExprs, masks) = extractGroupingSet(children)
+ GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
+ case _ => sys.error("Expect GROUPING SETS")
+ }),
+ rollupGroupByClause.map(e => e match {
+ case Token("TOK_ROLLUP_GROUPBY", children) =>
+ Aggregate(
+ Seq(Rollup(children.map(nodeToExpr))),
+ selectExpressions,
+ withLateralView)
+ case _ => sys.error("Expect WITH ROLLUP")
+ }),
+ cubeGroupByClause.map(e => e match {
+ case Token("TOK_CUBE_GROUPBY", children) =>
+ Aggregate(
+ Seq(Cube(children.map(nodeToExpr))),
+ selectExpressions,
+ withLateralView)
+ case _ => sys.error("Expect WITH CUBE")
+ }),
+ Some(Project(selectExpressions, withLateralView))).flatten.head
+ }
+
+ // Handle HAVING clause.
+ val withHaving = havingClause.map { h =>
+ val havingExpr = h.children match { case Seq(hexpr) => nodeToExpr(hexpr) }
+ // Note that we added a cast to boolean. If the expression itself is already boolean,
+ // the optimizer will get rid of the unnecessary cast.
+ Filter(Cast(havingExpr, BooleanType), withProject)
+ }.getOrElse(withProject)
+
+ // Handle SELECT DISTINCT
+ val withDistinct =
+ if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving
+
+ // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
+ val withSort =
+ (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
+ case (Some(totalOrdering), None, None, None) =>
+ Sort(totalOrdering.children.map(nodeToSortOrder), global = true, withDistinct)
+ case (None, Some(perPartitionOrdering), None, None) =>
+ Sort(
+ perPartitionOrdering.children.map(nodeToSortOrder),
+ global = false, withDistinct)
+ case (None, None, Some(partitionExprs), None) =>
+ RepartitionByExpression(
+ partitionExprs.children.map(nodeToExpr), withDistinct)
+ case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
+ Sort(
+ perPartitionOrdering.children.map(nodeToSortOrder), global = false,
+ RepartitionByExpression(
+ partitionExprs.children.map(nodeToExpr),
+ withDistinct))
+ case (None, None, None, Some(clusterExprs)) =>
+ Sort(
+ clusterExprs.children.map(nodeToExpr).map(SortOrder(_, Ascending)),
+ global = false,
+ RepartitionByExpression(
+ clusterExprs.children.map(nodeToExpr),
+ withDistinct))
+ case (None, None, None, None) => withDistinct
+ case _ => sys.error("Unsupported set of ordering / distribution clauses.")
+ }
+
+ val withLimit =
+ limitClause.map(l => nodeToExpr(l.children.head))
+ .map(Limit(_, withSort))
+ .getOrElse(withSort)
+
+ // Collect all window specifications defined in the WINDOW clause.
+ val windowDefinitions = windowClause.map(_.children.collect {
+ case Token("TOK_WINDOWDEF",
+ Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
+ windowName -> nodesToWindowSpecification(spec)
+ }.toMap)
+ // Handle cases like
+ // window w1 as (partition by p_mfgr order by p_name
+ // range between 2 preceding and 2 following),
+ // w2 as w1
+ val resolvedCrossReference = windowDefinitions.map {
+ windowDefMap => windowDefMap.map {
+ case (windowName, WindowSpecReference(other)) =>
+ (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition])
+ case o => o.asInstanceOf[(String, WindowSpecDefinition)]
+ }
+ }
+
+ val withWindowDefinitions =
+ resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit)
+
+ // TOK_INSERT_INTO means to add files to the table.
+ // TOK_DESTINATION means to overwrite the table.
+ val resultDestination =
+ (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
+ val overwrite = intoClause.isEmpty
+ nodeToDest(
+ resultDestination,
+ withWindowDefinitions,
+ overwrite)
+ }
+
+ // If there are multiple INSERTS just UNION them together into one query.
+ val query = if (queries.length == 1) queries.head else Union(queries)
+
+ // return With plan if there is CTE
+ cteRelations.map(With(query, _)).getOrElse(query)
+
+ case Token("TOK_UNIONALL", left :: right :: Nil) =>
+ Union(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
+ Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
+ case Token("TOK_EXCEPT", left :: right :: Nil) =>
+ Except(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_INTERSECT", left :: right :: Nil) =>
+ Intersect(nodeToPlan(left), nodeToPlan(right))
+
+ case _ =>
+ noParseRule("Plan", node)
+ }
+
+ val allJoinTokens = "(TOK_.*JOIN)".r
+ val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
+ protected def nodeToRelation(node: ASTNode): LogicalPlan = {
+ node match {
+ case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
+ SubqueryAlias(cleanIdentifier(alias), nodeToPlan(query))
+
+ case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
+ nodeToGenerate(
+ selectClause,
+ outer = isOuter.nonEmpty,
+ nodeToRelation(relationClause))
+
+ /* All relations, possibly with aliases or sampling clauses. */
+ case Token("TOK_TABREF", clauses) =>
+ // If the last clause is not a token then it's the alias of the table.
+ val (nonAliasClauses, aliasClause) =
+ if (clauses.last.text.startsWith("TOK")) {
+ (clauses, None)
+ } else {
+ (clauses.dropRight(1), Some(clauses.last))
+ }
+
+ val (Some(tableNameParts) ::
+ splitSampleClause ::
+ bucketSampleClause :: Nil) = {
+ getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"),
+ nonAliasClauses)
+ }
+
+ val tableIdent = extractTableIdent(tableNameParts)
+ val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
+ val relation = UnresolvedRelation(tableIdent, alias)
+
+ // Apply sampling if requested.
+ (bucketSampleClause orElse splitSampleClause).map {
+ case Token("TOK_TABLESPLITSAMPLE",
+ Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
+ Limit(Literal(count.toInt), relation)
+ case Token("TOK_TABLESPLITSAMPLE",
+ Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
+ // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
+ // function takes X PERCENT as the input and the range of X is [0, 100], we need to
+ // adjust the fraction.
+ require(
+ fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
+ && fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
+ s"Sampling fraction ($fraction) must be on interval [0, 100]")
+ Sample(0.0, fraction.toDouble / 100, withReplacement = false,
+ (math.random * 1000).toInt,
+ relation)(
+ isTableSample = true)
+ case Token("TOK_TABLEBUCKETSAMPLE",
+ Token(numerator, Nil) ::
+ Token(denominator, Nil) :: Nil) =>
+ val fraction = numerator.toDouble / denominator.toDouble
+ Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, relation)(
+ isTableSample = true)
+ case a =>
+ noParseRule("Sampling", a)
+ }.getOrElse(relation)
+
+ case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) =>
+ if (!(other.size <= 1)) {
+ sys.error(s"Unsupported join operation: $other")
+ }
+
+ val joinType = joinToken match {
+ case "TOK_JOIN" => Inner
+ case "TOK_CROSSJOIN" => Inner
+ case "TOK_RIGHTOUTERJOIN" => RightOuter
+ case "TOK_LEFTOUTERJOIN" => LeftOuter
+ case "TOK_FULLOUTERJOIN" => FullOuter
+ case "TOK_LEFTSEMIJOIN" => LeftSemi
+ case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
+ case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
+ case "TOK_NATURALJOIN" => NaturalJoin(Inner)
+ case "TOK_NATURALRIGHTOUTERJOIN" => NaturalJoin(RightOuter)
+ case "TOK_NATURALLEFTOUTERJOIN" => NaturalJoin(LeftOuter)
+ case "TOK_NATURALFULLOUTERJOIN" => NaturalJoin(FullOuter)
+ }
+ Join(nodeToRelation(relation1),
+ nodeToRelation(relation2),
+ joinType,
+ other.headOption.map(nodeToExpr))
+
+ case _ =>
+ noParseRule("Relation", node)
+ }
+ }
+
+ protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
+ case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
+ SortOrder(nodeToExpr(sortExpr), Ascending)
+ case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
+ SortOrder(nodeToExpr(sortExpr), Descending)
+ case _ =>
+ noParseRule("SortOrder", node)
+ }
+
+ val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
+ protected def nodeToDest(
+ node: ASTNode,
+ query: LogicalPlan,
+ overwrite: Boolean): LogicalPlan = node match {
+ case Token(destinationToken(),
+ Token("TOK_DIR",
+ Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
+ query
+
+ case Token(destinationToken(),
+ Token("TOK_TAB",
+ tableArgs) :: Nil) =>
+ val Some(tableNameParts) :: partitionClause :: Nil =
+ getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+ val tableIdent = extractTableIdent(tableNameParts)
+
+ val partitionKeys = partitionClause.map(_.children.map {
+ // Parse partitions. We also make keys case insensitive.
+ case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+ cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+ case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+ cleanIdentifier(key.toLowerCase) -> None
+ }.toMap).getOrElse(Map.empty)
+
+ InsertIntoTable(
+ UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = false)
+
+ case Token(destinationToken(),
+ Token("TOK_TAB",
+ tableArgs) ::
+ Token("TOK_IFNOTEXISTS",
+ ifNotExists) :: Nil) =>
+ val Some(tableNameParts) :: partitionClause :: Nil =
+ getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
+
+ val tableIdent = extractTableIdent(tableNameParts)
+
+ val partitionKeys = partitionClause.map(_.children.map {
+ // Parse partitions. We also make keys case insensitive.
+ case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+ cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
+ case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
+ cleanIdentifier(key.toLowerCase) -> None
+ }.toMap).getOrElse(Map.empty)
+
+ InsertIntoTable(
+ UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = true)
+
+ case _ =>
+ noParseRule("Destination", node)
+ }
+
+ protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = node match {
+ case Token("TOK_SELEXPR", e :: Nil) =>
+ Some(nodeToExpr(e))
+
+ case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
+ Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
+
+ case Token("TOK_SELEXPR", e :: aliasChildren) =>
+ val aliasNames = aliasChildren.collect {
+ case Token(name, Nil) => cleanIdentifier(name)
+ }
+ Some(MultiAlias(nodeToExpr(e), aliasNames))
+
+ /* Hints are ignored */
+ case Token("TOK_HINTLIST", _) => None
+
+ case _ =>
+ noParseRule("Select", node)
+ }
+
+ protected val escapedIdentifier = "`(.+)`".r
+ protected val doubleQuotedString = "\"([^\"]+)\"".r
+ protected val singleQuotedString = "'([^']+)'".r
+
+ protected def unquoteString(str: String) = str match {
+ case singleQuotedString(s) => s
+ case doubleQuotedString(s) => s
+ case other => other
+ }
+
+ /** Strips backticks from ident if present */
+ protected def cleanIdentifier(ident: String): String = ident match {
+ case escapedIdentifier(i) => i
+ case plainIdent => plainIdent
+ }
+
+ /* Case insensitive matches */
+ val COUNT = "(?i)COUNT".r
+ val SUM = "(?i)SUM".r
+ val AND = "(?i)AND".r
+ val OR = "(?i)OR".r
+ val NOT = "(?i)NOT".r
+ val TRUE = "(?i)TRUE".r
+ val FALSE = "(?i)FALSE".r
+ val LIKE = "(?i)LIKE".r
+ val RLIKE = "(?i)RLIKE".r
+ val REGEXP = "(?i)REGEXP".r
+ val IN = "(?i)IN".r
+ val DIV = "(?i)DIV".r
+ val BETWEEN = "(?i)BETWEEN".r
+ val WHEN = "(?i)WHEN".r
+ val CASE = "(?i)CASE".r
+
+ val INTEGRAL = "[+-]?\\d+".r
+ val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
+
+ protected def nodeToExpr(node: ASTNode): Expression = node match {
+ /* Attribute References */
+ case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
+ UnresolvedAttribute.quoted(cleanIdentifier(name))
+ case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
+ nodeToExpr(qualifier) match {
+ case UnresolvedAttribute(nameParts) =>
+ UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
+ case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
+ }
+ case Token("TOK_SUBQUERY_EXPR", Token("TOK_SUBQUERY_OP", Nil) :: subquery :: Nil) =>
+ ScalarSubquery(nodeToPlan(subquery))
+
+ /* Stars (*) */
+ case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
+ // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
+ // has a single child which is tableName.
+ case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
+ UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
+
+ /* Aggregate Functions */
+ case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
+ Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
+ case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
+ Count(Literal(1)).toAggregateExpression()
+
+ /* Casts */
+ case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), StringType)
+ case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), StringType)
+ case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), StringType)
+ case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), IntegerType)
+ case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), LongType)
+ case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), FloatType)
+ case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DoubleType)
+ case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), ShortType)
+ case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), ByteType)
+ case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), BinaryType)
+ case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), BooleanType)
+ case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, scale.text.toInt))
+ case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
+ case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
+ case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), TimestampType)
+ case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DateType)
+
+ /* Arithmetic */
+ case Token("+", child :: Nil) => nodeToExpr(child)
+ case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
+ case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
+ case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
+ case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
+ case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
+ case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
+ case Token(DIV(), left :: right:: Nil) =>
+ Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
+ case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
+ case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
+ case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
+ case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
+
+ /* Comparisons */
+ case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+ case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
+ case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
+ case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
+ case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
+ case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
+ case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
+ case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
+ case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
+ case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right))
+ case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
+ case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
+ case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
+ IsNotNull(nodeToExpr(child))
+ case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
+ IsNull(nodeToExpr(child))
+ case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
+ In(nodeToExpr(value), list.map(nodeToExpr))
+ case Token("TOK_FUNCTION",
+ Token(BETWEEN(), Nil) ::
+ kw ::
+ target ::
+ minValue ::
+ maxValue :: Nil) =>
+
+ val targetExpression = nodeToExpr(target)
+ val betweenExpr =
+ And(
+ GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
+ LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+ kw match {
+ case Token("KW_FALSE", Nil) => betweenExpr
+ case Token("KW_TRUE", Nil) => Not(betweenExpr)
+ }
+
+ /* Boolean Logic */
+ case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
+ case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
+ case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
+ case Token("!", child :: Nil) => Not(nodeToExpr(child))
+
+ /* Case statements */
+ case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
+ CaseWhen.createFromParser(branches.map(nodeToExpr))
+ case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
+ val keyExpr = nodeToExpr(branches.head)
+ CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
+
+ /* Complex datatype manipulation */
+ case Token("[", child :: ordinal :: Nil) =>
+ UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
+
+ /* Window Functions */
+ case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
+ val function = nodeToExpr(node.copy(children = node.children.init))
+ nodesToWindowSpecification(spec) match {
+ case reference: WindowSpecReference =>
+ UnresolvedWindowExpression(function, reference)
+ case definition: WindowSpecDefinition =>
+ WindowExpression(function, definition)
+ }
+
+ /* UDFs - Must be last otherwise will preempt built in functions */
+ case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
+ UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
+ // Aggregate function with DISTINCT keyword.
+ case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
+ UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
+ case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
+ UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)
+
+ /* Literals */
+ case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
+ case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
+ case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
+ case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
+ Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
+
+ case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
+
+ case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
+
+ case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
+
+ case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
+ Literal(ast.text.toDouble)
+
+ case ast if ast.tokenType == SparkSqlParser.Number =>
+ val text = ast.text
+ text match {
+ case INTEGRAL() =>
+ BigDecimal(text) match {
+ case v if v.isValidInt =>
+ Literal(v.intValue())
+ case v if v.isValidLong =>
+ Literal(v.longValue())
+ case v => Literal(v.underlying())
+ }
+ case DECIMAL(_*) =>
+ Literal(BigDecimal(text).underlying())
+ case _ =>
+ // Convert a scientifically notated decimal into a double.
+ Literal(text.toDouble)
+ }
+ case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
+ Literal(ParseUtils.unescapeSQLString(ast.text))
+
+ case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
+ Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
+
+ case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
+ Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
+
+ case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
+ Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
+
+ case Token("TOK_INTERVAL", elements) =>
+ var interval = new CalendarInterval(0, 0)
+ var updated = false
+ elements.foreach {
+ // The interval node will always contain children for all possible time units. A child node
+ // is only useful when it contains exactly one (numeric) child.
+ case e @ Token(name, Token(value, Nil) :: Nil) =>
+ val unit = name match {
+ case "TOK_INTERVAL_YEAR_LITERAL" => "year"
+ case "TOK_INTERVAL_MONTH_LITERAL" => "month"
+ case "TOK_INTERVAL_WEEK_LITERAL" => "week"
+ case "TOK_INTERVAL_DAY_LITERAL" => "day"
+ case "TOK_INTERVAL_HOUR_LITERAL" => "hour"
+ case "TOK_INTERVAL_MINUTE_LITERAL" => "minute"
+ case "TOK_INTERVAL_SECOND_LITERAL" => "second"
+ case "TOK_INTERVAL_MILLISECOND_LITERAL" => "millisecond"
+ case "TOK_INTERVAL_MICROSECOND_LITERAL" => "microsecond"
+ case _ => noParseRule(s"Interval($name)", e)
+ }
+ interval = interval.add(CalendarInterval.fromSingleUnitString(unit, value))
+ updated = true
+ case _ =>
+ }
+ if (!updated) {
+ throw new AnalysisException("at least one time unit should be given for interval literal")
+ }
+ Literal(interval)
+
+ case _ =>
+ noParseRule("Expression", node)
+ }
+
+ /* Case insensitive matches for Window Specification */
+ val PRECEDING = "(?i)preceding".r
+ val FOLLOWING = "(?i)following".r
+ val CURRENT = "(?i)current".r
+ protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match {
+ case Token(windowName, Nil) :: Nil =>
+ // Refer to a window spec defined in the window clause.
+ WindowSpecReference(windowName)
+ case Nil =>
+ // OVER()
+ WindowSpecDefinition(
+ partitionSpec = Nil,
+ orderSpec = Nil,
+ frameSpecification = UnspecifiedFrame)
+ case spec =>
+ val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
+ getClauses(
+ Seq(
+ "TOK_PARTITIONINGSPEC",
+ "TOK_WINDOWRANGE",
+ "TOK_WINDOWVALUES"),
+ spec)
+
+ // Handle Partition By and Order By.
+ val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering =>
+ val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) =
+ getClauses(
+ Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"),
+ partitionAndOrdering.children)
+
+ (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match {
+ case (Some(partitionByExpr), Some(orderByExpr), None) =>
+ (partitionByExpr.children.map(nodeToExpr),
+ orderByExpr.children.map(nodeToSortOrder))
+ case (Some(partitionByExpr), None, None) =>
+ (partitionByExpr.children.map(nodeToExpr), Nil)
+ case (None, Some(orderByExpr), None) =>
+ (Nil, orderByExpr.children.map(nodeToSortOrder))
+ case (None, None, Some(clusterByExpr)) =>
+ val expressions = clusterByExpr.children.map(nodeToExpr)
+ (expressions, expressions.map(SortOrder(_, Ascending)))
+ case _ =>
+ noParseRule("Partition & Ordering", partitionAndOrdering)
+ }
+ }.getOrElse {
+ (Nil, Nil)
+ }
+
+ // Handle Window Frame
+ val windowFrame =
+ if (rowFrame.isEmpty && rangeFrame.isEmpty) {
+ UnspecifiedFrame
+ } else {
+ val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
+ def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
+ case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
+ if (count.toLowerCase() == "unbounded") {
+ UnboundedPreceding
+ } else {
+ ValuePreceding(count.toInt)
+ }
+ case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
+ if (count.toLowerCase() == "unbounded") {
+ UnboundedFollowing
+ } else {
+ ValueFollowing(count.toInt)
+ }
+ case Token(CURRENT(), Nil) => CurrentRow
+ case _ =>
+ noParseRule("Window Frame Boundary", node)
+ }
+
+ rowFrame.orElse(rangeFrame).map { frame =>
+ frame.children match {
+ case precedingNode :: followingNode :: Nil =>
+ SpecifiedWindowFrame(
+ frameType,
+ nodeToBoundary(precedingNode),
+ nodeToBoundary(followingNode))
+ case precedingNode :: Nil =>
+ SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow)
+ case _ =>
+ noParseRule("Window Frame", frame)
+ }
+ }.getOrElse(sys.error(s"If you see this, please file a bug report with your query."))
+ }
+
+ WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
+ }
+
+ protected def nodeToTransformation(
+ node: ASTNode,
+ child: LogicalPlan): Option[ScriptTransformation] = None
+
+ val explode = "(?i)explode".r
+ val jsonTuple = "(?i)json_tuple".r
+ protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
+ val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
+
+ val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
+
+ val generator = clauses.head match {
+ case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>
+ Explode(nodeToExpr(childNode))
+ case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
+ JsonTuple(children.map(nodeToExpr))
+ case other =>
+ nodeToGenerator(other)
+ }
+
+ val attributes = clauses.collect {
+ case Token(a, Nil) => UnresolvedAttribute(a.toLowerCase)
+ }
+
+ Generate(generator, join = true, outer = outer, Some(alias.toLowerCase), attributes, child)
+ }
+
+ protected def nodeToGenerator(node: ASTNode): Generator = noParseRule("Generator", node)
+
+ protected def noParseRule(msg: String, node: ASTNode): Nothing = throw new NotImplementedError(
+ s"[$msg]: No parse rules for ASTNode type: ${node.tokenType}, tree:\n${node.treeString}")
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
new file mode 100644
index 0000000..21deb82
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.language.implicitConversions
+import scala.util.matching.Regex
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+
+import org.apache.spark.sql.types._
+
+/**
+ * This is a data type parser that can be used to parse string representations of data types
+ * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
+ */
+private[sql] trait DataTypeParser extends StandardTokenParsers {
+
+ // This is used to create a parser from a regex. We are using regexes for data type strings
+ // since these strings can be also used as column names or field names.
+ import lexical.Identifier
+ implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
+ s"identifier matching regex ${regex}",
+ { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
+ )
+
+ protected lazy val primitiveType: Parser[DataType] =
+ "(?i)string".r ^^^ StringType |
+ "(?i)float".r ^^^ FloatType |
+ "(?i)(?:int|integer)".r ^^^ IntegerType |
+ "(?i)tinyint".r ^^^ ByteType |
+ "(?i)smallint".r ^^^ ShortType |
+ "(?i)double".r ^^^ DoubleType |
+ "(?i)(?:bigint|long)".r ^^^ LongType |
+ "(?i)binary".r ^^^ BinaryType |
+ "(?i)boolean".r ^^^ BooleanType |
+ fixedDecimalType |
+ "(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
+ "(?i)date".r ^^^ DateType |
+ "(?i)timestamp".r ^^^ TimestampType |
+ varchar |
+ char
+
+ protected lazy val fixedDecimalType: Parser[DataType] =
+ ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+ case precision ~ scale =>
+ DecimalType(precision.toInt, scale.toInt)
+ }
+
+ protected lazy val char: Parser[DataType] =
+ "(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
+
+ protected lazy val varchar: Parser[DataType] =
+ "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
+
+ protected lazy val arrayType: Parser[DataType] =
+ "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
+ case tpe => ArrayType(tpe)
+ }
+
+ protected lazy val mapType: Parser[DataType] =
+ "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
+ case t1 ~ _ ~ t2 => MapType(t1, t2)
+ }
+
+ protected lazy val structField: Parser[StructField] =
+ ident ~ ":" ~ dataType ^^ {
+ case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
+ }
+
+ protected lazy val structType: Parser[DataType] =
+ ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
+ case fields => new StructType(fields.toArray)
+ }) |
+ ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
+
+ protected lazy val dataType: Parser[DataType] =
+ arrayType |
+ mapType |
+ structType |
+ primitiveType
+
+ def toDataType(dataTypeString: String): DataType = synchronized {
+ phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
+ case Success(result, _) => result
+ case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
+ }
+ }
+
+ private def failMessage(dataTypeString: String): String = {
+ s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
+ "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
+ "Please note that backtick itself is not supported in a field name."
+ }
+}
+
+private[sql] object DataTypeParser {
+ lazy val dataTypeParser = new DataTypeParser {
+ override val lexical = new SqlLexical
+ }
+
+ def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
+}
+
+/** The exception thrown from the [[DataTypeParser]]. */
+private[sql] class DataTypeException(message: String) extends Exception(message)
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
new file mode 100644
index 0000000..60d7361
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.util.parsing.combinator.RegexParsers
+
+import org.apache.spark.sql.types._
+
+/**
+ * Parser that turns case class strings into datatypes. This is only here to maintain compatibility
+ * with Parquet files written by Spark 1.1 and below.
+ */
+object LegacyTypeStringParser extends RegexParsers {
+
+ protected lazy val primitiveType: Parser[DataType] =
+ ( "StringType" ^^^ StringType
+ | "FloatType" ^^^ FloatType
+ | "IntegerType" ^^^ IntegerType
+ | "ByteType" ^^^ ByteType
+ | "ShortType" ^^^ ShortType
+ | "DoubleType" ^^^ DoubleType
+ | "LongType" ^^^ LongType
+ | "BinaryType" ^^^ BinaryType
+ | "BooleanType" ^^^ BooleanType
+ | "DateType" ^^^ DateType
+ | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
+ | fixedDecimalType
+ | "TimestampType" ^^^ TimestampType
+ )
+
+ protected lazy val fixedDecimalType: Parser[DataType] =
+ ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
+ case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+ }
+
+ protected lazy val arrayType: Parser[DataType] =
+ "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
+ case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
+ }
+
+ protected lazy val mapType: Parser[DataType] =
+ "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
+ case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
+ }
+
+ protected lazy val structField: Parser[StructField] =
+ ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
+ case name ~ tpe ~ nullable =>
+ StructField(name, tpe, nullable = nullable)
+ }
+
+ protected lazy val boolVal: Parser[Boolean] =
+ ( "true" ^^^ true
+ | "false" ^^^ false
+ )
+
+ protected lazy val structType: Parser[DataType] =
+ "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
+ case fields => StructType(fields)
+ }
+
+ protected lazy val dataType: Parser[DataType] =
+ ( arrayType
+ | mapType
+ | structType
+ | primitiveType
+ )
+
+ /**
+ * Parses a string representation of a DataType.
+ */
+ def parse(asString: String): DataType = parseAll(dataType, asString) match {
+ case Success(result, _) => result
+ case failure: NoSuccess =>
+ throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
+ }
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
new file mode 100644
index 0000000..7f35d65
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * Interface for a parser.
+ */
+trait ParserInterface {
+ /** Creates LogicalPlan for a given SQL string. */
+ def parsePlan(sqlText: String): LogicalPlan
+
+ /** Creates Expression for a given SQL string. */
+ def parseExpression(sqlText: String): Expression
+
+ /** Creates TableIdentifier for a given SQL string. */
+ def parseTableIdentifier(sqlText: String): TableIdentifier
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
deleted file mode 100644
index 515c071..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeParser.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import scala.language.implicitConversions
-import scala.util.matching.Regex
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-
-import org.apache.spark.sql.catalyst.SqlLexical
-import org.apache.spark.sql.types._
-
-/**
- * This is a data type parser that can be used to parse string representations of data types
- * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
- */
-private[sql] trait DataTypeParser extends StandardTokenParsers {
-
- // This is used to create a parser from a regex. We are using regexes for data type strings
- // since these strings can be also used as column names or field names.
- import lexical.Identifier
- implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
- s"identifier matching regex ${regex}",
- { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
- )
-
- protected lazy val primitiveType: Parser[DataType] =
- "(?i)string".r ^^^ StringType |
- "(?i)float".r ^^^ FloatType |
- "(?i)(?:int|integer)".r ^^^ IntegerType |
- "(?i)tinyint".r ^^^ ByteType |
- "(?i)smallint".r ^^^ ShortType |
- "(?i)double".r ^^^ DoubleType |
- "(?i)(?:bigint|long)".r ^^^ LongType |
- "(?i)binary".r ^^^ BinaryType |
- "(?i)boolean".r ^^^ BooleanType |
- fixedDecimalType |
- "(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
- "(?i)date".r ^^^ DateType |
- "(?i)timestamp".r ^^^ TimestampType |
- varchar |
- char
-
- protected lazy val fixedDecimalType: Parser[DataType] =
- ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
- case precision ~ scale =>
- DecimalType(precision.toInt, scale.toInt)
- }
-
- protected lazy val char: Parser[DataType] =
- "(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
- protected lazy val varchar: Parser[DataType] =
- "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
- protected lazy val arrayType: Parser[DataType] =
- "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
- case tpe => ArrayType(tpe)
- }
-
- protected lazy val mapType: Parser[DataType] =
- "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
- case t1 ~ _ ~ t2 => MapType(t1, t2)
- }
-
- protected lazy val structField: Parser[StructField] =
- ident ~ ":" ~ dataType ^^ {
- case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
- }
-
- protected lazy val structType: Parser[DataType] =
- ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
- case fields => new StructType(fields.toArray)
- }) |
- ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
-
- protected lazy val dataType: Parser[DataType] =
- arrayType |
- mapType |
- structType |
- primitiveType
-
- def toDataType(dataTypeString: String): DataType = synchronized {
- phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
- case Success(result, _) => result
- case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
- }
- }
-
- private def failMessage(dataTypeString: String): String = {
- s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
- "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
- "Please note that backtick itself is not supported in a field name."
- }
-}
-
-private[sql] object DataTypeParser {
- lazy val dataTypeParser = new DataTypeParser {
- override val lexical = new SqlLexical
- }
-
- def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
-}
-
-/** The exception thrown from the [[DataTypeParser]]. */
-private[sql] class DataTypeException(message: String) extends Exception(message)
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
deleted file mode 100644
index e27cf9c..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/LegacyTypeStringParser.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-import scala.util.parsing.combinator.RegexParsers
-
-import org.apache.spark.sql.types._
-
-/**
- * Parser that turns case class strings into datatypes. This is only here to maintain compatibility
- * with Parquet files written by Spark 1.1 and below.
- */
-object LegacyTypeStringParser extends RegexParsers {
-
- protected lazy val primitiveType: Parser[DataType] =
- ( "StringType" ^^^ StringType
- | "FloatType" ^^^ FloatType
- | "IntegerType" ^^^ IntegerType
- | "ByteType" ^^^ ByteType
- | "ShortType" ^^^ ShortType
- | "DoubleType" ^^^ DoubleType
- | "LongType" ^^^ LongType
- | "BinaryType" ^^^ BinaryType
- | "BooleanType" ^^^ BooleanType
- | "DateType" ^^^ DateType
- | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
- | fixedDecimalType
- | "TimestampType" ^^^ TimestampType
- )
-
- protected lazy val fixedDecimalType: Parser[DataType] =
- ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
- case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
- }
-
- protected lazy val arrayType: Parser[DataType] =
- "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
- case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
- }
-
- protected lazy val mapType: Parser[DataType] =
- "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
- case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
- }
-
- protected lazy val structField: Parser[StructField] =
- ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
- case name ~ tpe ~ nullable =>
- StructField(name, tpe, nullable = nullable)
- }
-
- protected lazy val boolVal: Parser[Boolean] =
- ( "true" ^^^ true
- | "false" ^^^ false
- )
-
- protected lazy val structType: Parser[DataType] =
- "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
- case fields => StructType(fields)
- }
-
- protected lazy val dataType: Parser[DataType] =
- ( arrayType
- | mapType
- | structType
- | primitiveType
- )
-
- /**
- * Parses a string representation of a DataType.
- */
- def parse(asString: String): DataType = parseAll(dataType, asString) match {
- case Success(result, _) => result
- case failure: NoSuccess =>
- throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
- }
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 271ca95..1238eef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -25,7 +25,8 @@ import org.json4s.JsonDSL._
import org.apache.spark.SparkException
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
-import org.apache.spark.sql.catalyst.util.{quoteIdentifier, DataTypeParser, LegacyTypeStringParser}
+import org.apache.spark.sql.catalyst.parser.{DataTypeParser, LegacyTypeStringParser}
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
/**
* :: DeveloperApi ::
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
deleted file mode 100644
index 53a8d6e..0000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.unsafe.types.CalendarInterval
-
-class CatalystQlSuite extends PlanTest {
- val parser = new CatalystQl()
-
- test("test case insensitive") {
- val result = Project(UnresolvedAlias(Literal(1)):: Nil, OneRowRelation)
- assert(result === parser.parsePlan("seLect 1"))
- assert(result === parser.parsePlan("select 1"))
- assert(result === parser.parsePlan("SELECT 1"))
- }
-
- test("test NOT operator with comparison operations") {
- val parsed = parser.parsePlan("SELECT NOT TRUE > TRUE")
- val expected = Project(
- UnresolvedAlias(
- Not(
- GreaterThan(Literal(true), Literal(true)))
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- test("test Union Distinct operator") {
- val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
- val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
- val expected =
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- SubqueryAlias("u_1",
- Distinct(
- Union(
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- UnresolvedRelation(TableIdentifier("t0"), None)),
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- UnresolvedRelation(TableIdentifier("t1"), None))))))
- comparePlans(parsed1, expected)
- comparePlans(parsed2, expected)
- }
-
- test("test Union All operator") {
- val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
- val expected =
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- SubqueryAlias("u_1",
- Union(
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- UnresolvedRelation(TableIdentifier("t0"), None)),
- Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
- UnresolvedRelation(TableIdentifier("t1"), None)))))
- comparePlans(parsed, expected)
- }
-
- test("support hive interval literal") {
- def checkInterval(sql: String, result: CalendarInterval): Unit = {
- val parsed = parser.parsePlan(sql)
- val expected = Project(
- UnresolvedAlias(
- Literal(result)
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- def checkYearMonth(lit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' YEAR TO MONTH",
- CalendarInterval.fromYearMonthString(lit))
- }
-
- def checkDayTime(lit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' DAY TO SECOND",
- CalendarInterval.fromDayTimeString(lit))
- }
-
- def checkSingleUnit(lit: String, unit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' $unit",
- CalendarInterval.fromSingleUnitString(unit, lit))
- }
-
- checkYearMonth("123-10")
- checkYearMonth("496-0")
- checkYearMonth("-2-3")
- checkYearMonth("-123-0")
-
- checkDayTime("99 11:22:33.123456789")
- checkDayTime("-99 11:22:33.123456789")
- checkDayTime("10 9:8:7.123456789")
- checkDayTime("1 0:0:0")
- checkDayTime("-1 0:0:0")
- checkDayTime("1 0:0:1")
-
- for (unit <- Seq("year", "month", "day", "hour", "minute", "second")) {
- checkSingleUnit("7", unit)
- checkSingleUnit("-7", unit)
- checkSingleUnit("0", unit)
- }
-
- checkSingleUnit("13.123456789", "second")
- checkSingleUnit("-13.123456789", "second")
- }
-
- test("support scientific notation") {
- def assertRight(input: String, output: Double): Unit = {
- val parsed = parser.parsePlan("SELECT " + input)
- val expected = Project(
- UnresolvedAlias(
- Literal(output)
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- assertRight("9.0e1", 90)
- assertRight(".9e+2", 90)
- assertRight("0.9e+2", 90)
- assertRight("900e-1", 90)
- assertRight("900.0E-1", 90)
- assertRight("9.e+1", 90)
-
- intercept[AnalysisException](parser.parsePlan("SELECT .e3"))
- }
-
- test("parse expressions") {
- compareExpressions(
- parser.parseExpression("prinln('hello', 'world')"),
- UnresolvedFunction(
- "prinln", Literal("hello") :: Literal("world") :: Nil, false))
-
- compareExpressions(
- parser.parseExpression("1 + r.r As q"),
- Alias(Add(Literal(1), UnresolvedAttribute("r.r")), "q")())
-
- compareExpressions(
- parser.parseExpression("1 - f('o', o(bar))"),
- Subtract(Literal(1),
- UnresolvedFunction("f",
- Literal("o") ::
- UnresolvedFunction("o", UnresolvedAttribute("bar") :: Nil, false) ::
- Nil, false)))
-
- intercept[AnalysisException](parser.parseExpression("1 - f('o', o(bar)) hello * world"))
- }
-
- test("table identifier") {
- assert(TableIdentifier("q") === parser.parseTableIdentifier("q"))
- assert(TableIdentifier("q", Some("d")) === parser.parseTableIdentifier("d.q"))
- intercept[AnalysisException](parser.parseTableIdentifier(""))
- intercept[AnalysisException](parser.parseTableIdentifier("d.q.g"))
- }
-
- test("parse union/except/intersect") {
- parser.parsePlan("select * from t1 union all select * from t2")
- parser.parsePlan("select * from t1 union distinct select * from t2")
- parser.parsePlan("select * from t1 union select * from t2")
- parser.parsePlan("select * from t1 except select * from t2")
- parser.parsePlan("select * from t1 intersect select * from t2")
- parser.parsePlan("(select * from t1) union all (select * from t2)")
- parser.parsePlan("(select * from t1) union distinct (select * from t2)")
- parser.parsePlan("(select * from t1) union (select * from t2)")
- parser.parsePlan("select * from ((select * from t1) union (select * from t2)) t")
- }
-
- test("window function: better support of parentheses") {
- parser.parsePlan("select sum(product + 1) over (partition by ((1) + (product / 2)) " +
- "order by 2) from windowData")
- parser.parsePlan("select sum(product + 1) over (partition by (1 + (product / 2)) " +
- "order by 2) from windowData")
- parser.parsePlan("select sum(product + 1) over (partition by ((product / 2) + 1) " +
- "order by 2) from windowData")
-
- parser.parsePlan("select sum(product + 1) over (partition by ((product) + (1)) order by 2) " +
- "from windowData")
- parser.parsePlan("select sum(product + 1) over (partition by ((product) + 1) order by 2) " +
- "from windowData")
- parser.parsePlan("select sum(product + 1) over (partition by (product + (1)) order by 2) " +
- "from windowData")
- }
-
- test("subquery") {
- parser.parsePlan("select (select max(b) from s) ss from t")
- parser.parsePlan("select * from t where a = (select b from s)")
- parser.parsePlan("select * from t group by g having a > (select b from s)")
- }
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
new file mode 100644
index 0000000..0660791
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+class CatalystQlSuite extends PlanTest {
+ val parser = new CatalystQl()
+
+ test("test case insensitive") {
+ val result = Project(UnresolvedAlias(Literal(1)):: Nil, OneRowRelation)
+ assert(result === parser.parsePlan("seLect 1"))
+ assert(result === parser.parsePlan("select 1"))
+ assert(result === parser.parsePlan("SELECT 1"))
+ }
+
+ test("test NOT operator with comparison operations") {
+ val parsed = parser.parsePlan("SELECT NOT TRUE > TRUE")
+ val expected = Project(
+ UnresolvedAlias(
+ Not(
+ GreaterThan(Literal(true), Literal(true)))
+ ) :: Nil,
+ OneRowRelation)
+ comparePlans(parsed, expected)
+ }
+
+ test("test Union Distinct operator") {
+ val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
+ val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
+ val expected =
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ SubqueryAlias("u_1",
+ Distinct(
+ Union(
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t0"), None)),
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t1"), None))))))
+ comparePlans(parsed1, expected)
+ comparePlans(parsed2, expected)
+ }
+
+ test("test Union All operator") {
+ val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
+ val expected =
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ SubqueryAlias("u_1",
+ Union(
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t0"), None)),
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t1"), None)))))
+ comparePlans(parsed, expected)
+ }
+
+ test("support hive interval literal") {
+ def checkInterval(sql: String, result: CalendarInterval): Unit = {
+ val parsed = parser.parsePlan(sql)
+ val expected = Project(
+ UnresolvedAlias(
+ Literal(result)
+ ) :: Nil,
+ OneRowRelation)
+ comparePlans(parsed, expected)
+ }
+
+ def checkYearMonth(lit: String): Unit = {
+ checkInterval(
+ s"SELECT INTERVAL '$lit' YEAR TO MONTH",
+ CalendarInterval.fromYearMonthString(lit))
+ }
+
+ def checkDayTime(lit: String): Unit = {
+ checkInterval(
+ s"SELECT INTERVAL '$lit' DAY TO SECOND",
+ CalendarInterval.fromDayTimeString(lit))
+ }
+
+ def checkSingleUnit(lit: String, unit: String): Unit = {
+ checkInterval(
+ s"SELECT INTERVAL '$lit' $unit",
+ CalendarInterval.fromSingleUnitString(unit, lit))
+ }
+
+ checkYearMonth("123-10")
+ checkYearMonth("496-0")
+ checkYearMonth("-2-3")
+ checkYearMonth("-123-0")
+
+ checkDayTime("99 11:22:33.123456789")
+ checkDayTime("-99 11:22:33.123456789")
+ checkDayTime("10 9:8:7.123456789")
+ checkDayTime("1 0:0:0")
+ checkDayTime("-1 0:0:0")
+ checkDayTime("1 0:0:1")
+
+ for (unit <- Seq("year", "month", "day", "hour", "minute", "second")) {
+ checkSingleUnit("7", unit)
+ checkSingleUnit("-7", unit)
+ checkSingleUnit("0", unit)
+ }
+
+ checkSingleUnit("13.123456789", "second")
+ checkSingleUnit("-13.123456789", "second")
+ }
+
+ test("support scientific notation") {
+ def assertRight(input: String, output: Double): Unit = {
+ val parsed = parser.parsePlan("SELECT " + input)
+ val expected = Project(
+ UnresolvedAlias(
+ Literal(output)
+ ) :: Nil,
+ OneRowRelation)
+ comparePlans(parsed, expected)
+ }
+
+ assertRight("9.0e1", 90)
+ assertRight(".9e+2", 90)
+ assertRight("0.9e+2", 90)
+ assertRight("900e-1", 90)
+ assertRight("900.0E-1", 90)
+ assertRight("9.e+1", 90)
+
+ intercept[AnalysisException](parser.parsePlan("SELECT .e3"))
+ }
+
+ test("parse expressions") {
+ compareExpressions(
+ parser.parseExpression("prinln('hello', 'world')"),
+ UnresolvedFunction(
+ "prinln", Literal("hello") :: Literal("world") :: Nil, false))
+
+ compareExpressions(
+ parser.parseExpression("1 + r.r As q"),
+ Alias(Add(Literal(1), UnresolvedAttribute("r.r")), "q")())
+
+ compareExpressions(
+ parser.parseExpression("1 - f('o', o(bar))"),
+ Subtract(Literal(1),
+ UnresolvedFunction("f",
+ Literal("o") ::
+ UnresolvedFunction("o", UnresolvedAttribute("bar") :: Nil, false) ::
+ Nil, false)))
+
+ intercept[AnalysisException](parser.parseExpression("1 - f('o', o(bar)) hello * world"))
+ }
+
+ test("table identifier") {
+ assert(TableIdentifier("q") === parser.parseTableIdentifier("q"))
+ assert(TableIdentifier("q", Some("d")) === parser.parseTableIdentifier("d.q"))
+ intercept[AnalysisException](parser.parseTableIdentifier(""))
+ intercept[AnalysisException](parser.parseTableIdentifier("d.q.g"))
+ }
+
+ test("parse union/except/intersect") {
+ parser.parsePlan("select * from t1 union all select * from t2")
+ parser.parsePlan("select * from t1 union distinct select * from t2")
+ parser.parsePlan("select * from t1 union select * from t2")
+ parser.parsePlan("select * from t1 except select * from t2")
+ parser.parsePlan("select * from t1 intersect select * from t2")
+ parser.parsePlan("(select * from t1) union all (select * from t2)")
+ parser.parsePlan("(select * from t1) union distinct (select * from t2)")
+ parser.parsePlan("(select * from t1) union (select * from t2)")
+ parser.parsePlan("select * from ((select * from t1) union (select * from t2)) t")
+ }
+
+ test("window function: better support of parentheses") {
+ parser.parsePlan("select sum(product + 1) over (partition by ((1) + (product / 2)) " +
+ "order by 2) from windowData")
+ parser.parsePlan("select sum(product + 1) over (partition by (1 + (product / 2)) " +
+ "order by 2) from windowData")
+ parser.parsePlan("select sum(product + 1) over (partition by ((product / 2) + 1) " +
+ "order by 2) from windowData")
+
+ parser.parsePlan("select sum(product + 1) over (partition by ((product) + (1)) order by 2) " +
+ "from windowData")
+ parser.parsePlan("select sum(product + 1) over (partition by ((product) + 1) order by 2) " +
+ "from windowData")
+ parser.parsePlan("select sum(product + 1) over (partition by (product + (1)) order by 2) " +
+ "from windowData")
+ }
+
+ test("subquery") {
+ parser.parsePlan("select (select max(b) from s) ss from t")
+ parser.parsePlan("select * from t where a = (select b from s)")
+ parser.parsePlan("select * from t group by g having a > (select b from s)")
+ }
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
new file mode 100644
index 0000000..7d36080
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -0,0 +1,123 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types._
+
+class DataTypeParserSuite extends SparkFunSuite {
+
+ def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
+ test(s"parse ${dataTypeString.replace("\n", "")}") {
+ assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
+ }
+ }
+
+ def unsupported(dataTypeString: String): Unit = {
+ test(s"$dataTypeString is not supported") {
+ intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
+ }
+ }
+
+ checkDataType("int", IntegerType)
+ checkDataType("integer", IntegerType)
+ checkDataType("BooLean", BooleanType)
+ checkDataType("tinYint", ByteType)
+ checkDataType("smallINT", ShortType)
+ checkDataType("INT", IntegerType)
+ checkDataType("INTEGER", IntegerType)
+ checkDataType("bigint", LongType)
+ checkDataType("float", FloatType)
+ checkDataType("dOUBle", DoubleType)
+ checkDataType("decimal(10, 5)", DecimalType(10, 5))
+ checkDataType("decimal", DecimalType.USER_DEFAULT)
+ checkDataType("DATE", DateType)
+ checkDataType("timestamp", TimestampType)
+ checkDataType("string", StringType)
+ checkDataType("ChaR(5)", StringType)
+ checkDataType("varchAr(20)", StringType)
+ checkDataType("cHaR(27)", StringType)
+ checkDataType("BINARY", BinaryType)
+
+ checkDataType("array<doublE>", ArrayType(DoubleType, true))
+ checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
+ checkDataType(
+ "array<struct<tinYint:tinyint>>",
+ ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
+ )
+ checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
+ checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
+ checkDataType(
+ "MAP<int, struct<varchar:string>>",
+ MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
+ )
+
+ checkDataType(
+ "struct<intType: int, ts:timestamp>",
+ StructType(
+ StructField("intType", IntegerType, true) ::
+ StructField("ts", TimestampType, true) :: Nil)
+ )
+ // It is fine to use the data type string as the column name.
+ checkDataType(
+ "Struct<int: int, timestamp:timestamp>",
+ StructType(
+ StructField("int", IntegerType, true) ::
+ StructField("timestamp", TimestampType, true) :: Nil)
+ )
+ checkDataType(
+ """
+ |struct<
+ | struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
+ | MAP:Map<timestamp, varchar(10)>,
+ | arrAy:Array<double>,
+ | anotherArray:Array<char(9)>>
+ """.stripMargin,
+ StructType(
+ StructField("struct",
+ StructType(
+ StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
+ StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
+ StructField("MAP", MapType(TimestampType, StringType), true) ::
+ StructField("arrAy", ArrayType(DoubleType, true), true) ::
+ StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
+ )
+ // A column name can be a reserved word in our DDL parser and SqlParser.
+ checkDataType(
+ "Struct<TABLE: string, CASE:boolean>",
+ StructType(
+ StructField("TABLE", StringType, true) ::
+ StructField("CASE", BooleanType, true) :: Nil)
+ )
+ // Use backticks to quote column names having special characters.
+ checkDataType(
+ "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
+ StructType(
+ StructField("x+y", IntegerType, true) ::
+ StructField("!@#$%^&*()", StringType, true) ::
+ StructField("1_2.345<>:\"", StringType, true) :: Nil)
+ )
+ // Empty struct.
+ checkDataType("strUCt<>", StructType(Nil))
+
+ unsupported("it is not a data type")
+ unsupported("struct<x+y: int, 1.1:timestamp>")
+ unsupported("struct<x: int")
+ unsupported("struct<x int, y string>")
+ unsupported("struct<`x``y` int>")
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org
[3/3] spark git commit: [SPARK-13633][SQL] Move things into
catalyst.parser package
Posted by an...@apache.org.
[SPARK-13633][SQL] Move things into catalyst.parser package
## What changes were proposed in this pull request?
This patch simply moves things to existing package `o.a.s.sql.catalyst.parser` in an effort to reduce the size of the diff in #11048. This is conceptually the same as a recently merged patch #11482.
## How was this patch tested?
Jenkins.
Author: Andrew Or <an...@databricks.com>
Closes #11506 from andrewor14/parser-package.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b7d41474
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b7d41474
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b7d41474
Branch: refs/heads/master
Commit: b7d41474216787e9cd38c04a15c43d5d02f02f93
Parents: 83302c3
Author: Andrew Or <an...@databricks.com>
Authored: Fri Mar 4 10:32:00 2016 -0800
Committer: Andrew Or <an...@databricks.com>
Committed: Fri Mar 4 10:32:00 2016 -0800
----------------------------------------------------------------------
.../sql/catalyst/AbstractSparkSQLParser.scala | 145 ---
.../apache/spark/sql/catalyst/CatalystQl.scala | 1010 -----------------
.../spark/sql/catalyst/ParserInterface.scala | 35 -
.../parser/AbstractSparkSQLParser.scala | 145 +++
.../spark/sql/catalyst/parser/CatalystQl.scala | 1012 ++++++++++++++++++
.../sql/catalyst/parser/DataTypeParser.scala | 119 ++
.../parser/LegacyTypeStringParser.scala | 92 ++
.../sql/catalyst/parser/ParserInterface.scala | 36 +
.../sql/catalyst/util/DataTypeParser.scala | 120 ---
.../catalyst/util/LegacyTypeStringParser.scala | 92 --
.../org/apache/spark/sql/types/StructType.scala | 3 +-
.../spark/sql/catalyst/CatalystQlSuite.scala | 210 ----
.../sql/catalyst/parser/CatalystQlSuite.scala | 211 ++++
.../catalyst/parser/DataTypeParserSuite.scala | 123 +++
.../sql/catalyst/util/DataTypeParserSuite.scala | 123 ---
.../scala/org/apache/spark/sql/Column.scala | 3 +-
.../scala/org/apache/spark/sql/SQLContext.scala | 1 +
.../apache/spark/sql/execution/SparkQl.scala | 4 +-
.../datasources/parquet/ParquetRelation.scala | 2 +-
.../scala/org/apache/spark/sql/functions.scala | 3 +-
.../spark/sql/internal/SessionState.scala | 2 +-
.../spark/sql/hive/HiveMetastoreCatalog.scala | 2 +-
.../spark/sql/hive/HiveSessionState.scala | 2 +-
23 files changed, 1751 insertions(+), 1744 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
deleted file mode 100644
index 38fa5cb..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import scala.language.implicitConversions
-import scala.util.parsing.combinator.lexical.StdLexical
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.combinator.PackratParsers
-import scala.util.parsing.input.CharArrayReader.EofCh
-
-import org.apache.spark.sql.catalyst.plans.logical._
-
-private[sql] abstract class AbstractSparkSQLParser
- extends StandardTokenParsers with PackratParsers with ParserInterface {
-
- def parsePlan(input: String): LogicalPlan = synchronized {
- // Initialize the Keywords.
- initLexical
- phrase(start)(new lexical.Scanner(input)) match {
- case Success(plan, _) => plan
- case failureOrError => sys.error(failureOrError.toString)
- }
- }
- /* One time initialization of lexical.This avoid reinitialization of lexical in parse method */
- protected lazy val initLexical: Unit = lexical.initialize(reservedWords)
-
- protected case class Keyword(str: String) {
- def normalize: String = lexical.normalizeKeyword(str)
- def parser: Parser[String] = normalize
- }
-
- protected implicit def asParser(k: Keyword): Parser[String] = k.parser
-
- // By default, use Reflection to find the reserved words defined in the sub class.
- // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
- // method during the parent class instantiation, because the sub class instance
- // isn't created yet.
- protected lazy val reservedWords: Seq[String] =
- this
- .getClass
- .getMethods
- .filter(_.getReturnType == classOf[Keyword])
- .map(_.invoke(this).asInstanceOf[Keyword].normalize)
-
- // Set the keywords as empty by default, will change that later.
- override val lexical = new SqlLexical
-
- protected def start: Parser[LogicalPlan]
-
- // Returns the whole input string
- protected lazy val wholeInput: Parser[String] = new Parser[String] {
- def apply(in: Input): ParseResult[String] =
- Success(in.source.toString, in.drop(in.source.length()))
- }
-
- // Returns the rest of the input string that are not parsed yet
- protected lazy val restInput: Parser[String] = new Parser[String] {
- def apply(in: Input): ParseResult[String] =
- Success(
- in.source.subSequence(in.offset, in.source.length()).toString,
- in.drop(in.source.length()))
- }
-}
-
-class SqlLexical extends StdLexical {
- case class DecimalLit(chars: String) extends Token {
- override def toString: String = chars
- }
-
- /* This is a work around to support the lazy setting */
- def initialize(keywords: Seq[String]): Unit = {
- reserved.clear()
- reserved ++= keywords
- }
-
- /* Normal the keyword string */
- def normalizeKeyword(str: String): String = str.toLowerCase
-
- delimiters += (
- "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
- ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
- )
-
- protected override def processIdent(name: String) = {
- val token = normalizeKeyword(name)
- if (reserved contains token) Keyword(token) else Identifier(name)
- }
-
- override lazy val token: Parser[Token] =
- ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
- | '.' ~> (rep1(digit) ~ scientificNotation) ^^
- { case i ~ s => DecimalLit("0." + i.mkString + s) }
- | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
- { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
- | digit.* ~ identChar ~ (identChar | digit).* ^^
- { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
- | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
- case i ~ None => NumericLit(i.mkString)
- case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
- }
- | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
- { case chars => StringLit(chars mkString "") }
- | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
- { case chars => StringLit(chars mkString "") }
- | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
- { case chars => Identifier(chars mkString "") }
- | EofCh ^^^ EOF
- | '\'' ~> failure("unclosed string literal")
- | '"' ~> failure("unclosed string literal")
- | delim
- | failure("illegal character")
- )
-
- override def identChar: Parser[Elem] = letter | elem('_')
-
- private lazy val scientificNotation: Parser[String] =
- (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
- case s ~ rest => "e" + s.mkString + rest.mkString
- }
-
- override def whitespace: Parser[Any] =
- ( whitespaceChar
- | '/' ~ '*' ~ comment
- | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
- | '#' ~ chrExcept(EofCh, '\n').*
- | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
- | '/' ~ '*' ~ failure("unclosed comment")
- ).*
-}
-
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
deleted file mode 100644
index a0a56d7..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.catalyst
-
-import java.sql.Date
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.Count
-import org.apache.spark.sql.catalyst.parser._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
-import org.apache.spark.util.random.RandomSampler
-
-/**
- * This class translates SQL to Catalyst [[LogicalPlan]]s or [[Expression]]s.
- */
-private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserInterface {
- object Token {
- def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
- CurrentOrigin.setPosition(node.line, node.positionInLine)
- node.pattern
- }
- }
-
- /**
- * The safeParse method allows a user to focus on the parsing/AST transformation logic. This
- * method will take care of possible errors during the parsing process.
- */
- protected def safeParse[T](sql: String, ast: ASTNode)(toResult: ASTNode => T): T = {
- try {
- toResult(ast)
- } catch {
- case e: MatchError => throw e
- case e: AnalysisException => throw e
- case e: Exception =>
- throw new AnalysisException(e.getMessage)
- case e: NotImplementedError =>
- throw new AnalysisException(
- s"""Unsupported language features in query
- |== SQL ==
- |$sql
- |== AST ==
- |${ast.treeString}
- |== Error ==
- |$e
- |== Stacktrace ==
- |${e.getStackTrace.head}
- """.stripMargin)
- }
- }
-
- /** Creates LogicalPlan for a given SQL string. */
- def parsePlan(sql: String): LogicalPlan =
- safeParse(sql, ParseDriver.parsePlan(sql, conf))(nodeToPlan)
-
- /** Creates Expression for a given SQL string. */
- def parseExpression(sql: String): Expression =
- safeParse(sql, ParseDriver.parseExpression(sql, conf))(selExprNodeToExpr(_).get)
-
- /** Creates TableIdentifier for a given SQL string. */
- def parseTableIdentifier(sql: String): TableIdentifier =
- safeParse(sql, ParseDriver.parseTableName(sql, conf))(extractTableIdent)
-
- def parseDdl(sql: String): Seq[Attribute] = {
- safeParse(sql, ParseDriver.parseExpression(sql, conf)) { ast =>
- val Token("TOK_CREATETABLE", children) = ast
- children
- .find(_.text == "TOK_TABCOLLIST")
- .getOrElse(sys.error("No columnList!"))
- .flatMap(_.children.map(nodeToAttribute))
- }
- }
-
- protected def getClauses(
- clauseNames: Seq[String],
- nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
- var remainingNodes = nodeList
- val clauses = clauseNames.map { clauseName =>
- val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase == clauseName)
- remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil)
- matches.headOption
- }
-
- if (remainingNodes.nonEmpty) {
- sys.error(
- s"""Unhandled clauses: ${remainingNodes.map(_.treeString).mkString("\n")}.
- |You are likely trying to use an unsupported Hive feature."""".stripMargin)
- }
- clauses
- }
-
- protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): ASTNode =
- getClauseOption(clauseName, nodeList).getOrElse(sys.error(
- s"Expected clause $clauseName missing from ${nodeList.map(_.treeString).mkString("\n")}"))
-
- protected def getClauseOption(clauseName: String, nodeList: Seq[ASTNode]): Option[ASTNode] = {
- nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
- case Seq(oneMatch) => Some(oneMatch)
- case Seq() => None
- case _ => sys.error(s"Found multiple instances of clause $clauseName")
- }
- }
-
- protected def nodeToAttribute(node: ASTNode): Attribute = node match {
- case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
- AttributeReference(colName, nodeToDataType(dataType), nullable = true)()
- case _ =>
- noParseRule("Attribute", node)
- }
-
- protected def nodeToDataType(node: ASTNode): DataType = node match {
- case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
- DecimalType(precision.text.toInt, scale.text.toInt)
- case Token("TOK_DECIMAL", precision :: Nil) =>
- DecimalType(precision.text.toInt, 0)
- case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
- case Token("TOK_BIGINT", Nil) => LongType
- case Token("TOK_INT", Nil) => IntegerType
- case Token("TOK_TINYINT", Nil) => ByteType
- case Token("TOK_SMALLINT", Nil) => ShortType
- case Token("TOK_BOOLEAN", Nil) => BooleanType
- case Token("TOK_STRING", Nil) => StringType
- case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
- case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
- case Token("TOK_FLOAT", Nil) => FloatType
- case Token("TOK_DOUBLE", Nil) => DoubleType
- case Token("TOK_DATE", Nil) => DateType
- case Token("TOK_TIMESTAMP", Nil) => TimestampType
- case Token("TOK_BINARY", Nil) => BinaryType
- case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
- case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
- StructType(fields.map(nodeToStructField))
- case Token("TOK_MAP", keyType :: valueType :: Nil) =>
- MapType(nodeToDataType(keyType), nodeToDataType(valueType))
- case _ =>
- noParseRule("DataType", node)
- }
-
- protected def nodeToStructField(node: ASTNode): StructField = node match {
- case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
- StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
- case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
- val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
- StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
- case _ =>
- noParseRule("StructField", node)
- }
-
- protected def extractTableIdent(tableNameParts: ASTNode): TableIdentifier = {
- tableNameParts.children.map {
- case Token(part, Nil) => cleanIdentifier(part)
- } match {
- case Seq(tableOnly) => TableIdentifier(tableOnly)
- case Seq(databaseName, table) => TableIdentifier(table, Some(databaseName))
- case other => sys.error("Hive only supports tables names like 'tableName' " +
- s"or 'databaseName.tableName', found '$other'")
- }
- }
-
- /**
- * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
- * is equivalent to
- * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
- * Check the following link for details.
- *
-https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
- *
- * The bitmask denotes the grouping expressions validity for a grouping set,
- * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
- * e.g. In superset (k1, k2, k3), (bit 2: k1, bit 1: k2, and bit 0: k3), the grouping id of
- * GROUPING SETS (k1, k2) and (k2) should be 1 and 5 respectively.
- */
- protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
- val (keyASTs, setASTs) = children.partition {
- case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping sets
- case _ => true // grouping keys
- }
-
- val keys = keyASTs.map(nodeToExpr)
- val keyMap = keyASTs.zipWithIndex.toMap
-
- val mask = (1 << keys.length) - 1
- val bitmasks: Seq[Int] = setASTs.map {
- case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
- columns.foldLeft(mask)((bitmap, col) => {
- val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2).getOrElse(
- throw new AnalysisException(s"${col.treeString} doesn't show up in the GROUP BY list"))
- // 0 means that the column at the given index is a grouping column, 1 means it is not,
- // so we unset the bit in bitmap.
- bitmap & ~(1 << (keys.length - 1 - keyIndex))
- })
- case _ => sys.error("Expect GROUPING SETS clause")
- }
-
- (keys, bitmasks)
- }
-
- protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
- case Token("TOK_SHOWFUNCTIONS", args) =>
- // Skip LIKE.
- val pattern = args match {
- case like :: nodes if like.text.toUpperCase == "LIKE" => nodes
- case nodes => nodes
- }
-
- // Extract Database and Function name
- pattern match {
- case Nil =>
- ShowFunctions(None, None)
- case Token(name, Nil) :: Nil =>
- ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
- case Token(db, Nil) :: Token(name, Nil) :: Nil =>
- ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
- Some(unquoteString(cleanIdentifier(name))))
- case _ =>
- noParseRule("SHOW FUNCTIONS", node)
- }
-
- case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
- DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
-
- case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
- val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
- queryArgs match {
- case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
- val cteRelations = ctes.map { node =>
- val relation = nodeToRelation(node).asInstanceOf[SubqueryAlias]
- relation.alias -> relation
- }
- (Some(from.head), inserts, Some(cteRelations.toMap))
- case Token("TOK_FROM", from) :: inserts =>
- (Some(from.head), inserts, None)
- case Token("TOK_INSERT", _) :: Nil =>
- (None, queryArgs, None)
- }
-
- // Return one query for each insert clause.
- val queries = insertClauses.map {
- case Token("TOK_INSERT", singleInsert) =>
- val (
- intoClause ::
- destClause ::
- selectClause ::
- selectDistinctClause ::
- whereClause ::
- groupByClause ::
- rollupGroupByClause ::
- cubeGroupByClause ::
- groupingSetsClause ::
- orderByClause ::
- havingClause ::
- sortByClause ::
- clusterByClause ::
- distributeByClause ::
- limitClause ::
- lateralViewClause ::
- windowClause :: Nil) = {
- getClauses(
- Seq(
- "TOK_INSERT_INTO",
- "TOK_DESTINATION",
- "TOK_SELECT",
- "TOK_SELECTDI",
- "TOK_WHERE",
- "TOK_GROUPBY",
- "TOK_ROLLUP_GROUPBY",
- "TOK_CUBE_GROUPBY",
- "TOK_GROUPING_SETS",
- "TOK_ORDERBY",
- "TOK_HAVING",
- "TOK_SORTBY",
- "TOK_CLUSTERBY",
- "TOK_DISTRIBUTEBY",
- "TOK_LIMIT",
- "TOK_LATERAL_VIEW",
- "WINDOW"),
- singleInsert)
- }
-
- val relations = fromClause match {
- case Some(f) => nodeToRelation(f)
- case None => OneRowRelation
- }
-
- val withWhere = whereClause.map { whereNode =>
- val Seq(whereExpr) = whereNode.children
- Filter(nodeToExpr(whereExpr), relations)
- }.getOrElse(relations)
-
- val select = (selectClause orElse selectDistinctClause)
- .getOrElse(sys.error("No select clause."))
-
- val transformation = nodeToTransformation(select.children.head, withWhere)
-
- val withLateralView = lateralViewClause.map { lv =>
- nodeToGenerate(lv.children.head, outer = false, withWhere)
- }.getOrElse(withWhere)
-
- // The projection of the query can either be a normal projection, an aggregation
- // (if there is a group by) or a script transformation.
- val withProject: LogicalPlan = transformation.getOrElse {
- val selectExpressions =
- select.children.flatMap(selExprNodeToExpr).map(UnresolvedAlias(_))
- Seq(
- groupByClause.map(e => e match {
- case Token("TOK_GROUPBY", children) =>
- // Not a transformation so must be either project or aggregation.
- Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
- case _ => sys.error("Expect GROUP BY")
- }),
- groupingSetsClause.map(e => e match {
- case Token("TOK_GROUPING_SETS", children) =>
- val(groupByExprs, masks) = extractGroupingSet(children)
- GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
- case _ => sys.error("Expect GROUPING SETS")
- }),
- rollupGroupByClause.map(e => e match {
- case Token("TOK_ROLLUP_GROUPBY", children) =>
- Aggregate(
- Seq(Rollup(children.map(nodeToExpr))),
- selectExpressions,
- withLateralView)
- case _ => sys.error("Expect WITH ROLLUP")
- }),
- cubeGroupByClause.map(e => e match {
- case Token("TOK_CUBE_GROUPBY", children) =>
- Aggregate(
- Seq(Cube(children.map(nodeToExpr))),
- selectExpressions,
- withLateralView)
- case _ => sys.error("Expect WITH CUBE")
- }),
- Some(Project(selectExpressions, withLateralView))).flatten.head
- }
-
- // Handle HAVING clause.
- val withHaving = havingClause.map { h =>
- val havingExpr = h.children match { case Seq(hexpr) => nodeToExpr(hexpr) }
- // Note that we added a cast to boolean. If the expression itself is already boolean,
- // the optimizer will get rid of the unnecessary cast.
- Filter(Cast(havingExpr, BooleanType), withProject)
- }.getOrElse(withProject)
-
- // Handle SELECT DISTINCT
- val withDistinct =
- if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving
-
- // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
- val withSort =
- (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
- case (Some(totalOrdering), None, None, None) =>
- Sort(totalOrdering.children.map(nodeToSortOrder), global = true, withDistinct)
- case (None, Some(perPartitionOrdering), None, None) =>
- Sort(
- perPartitionOrdering.children.map(nodeToSortOrder),
- global = false, withDistinct)
- case (None, None, Some(partitionExprs), None) =>
- RepartitionByExpression(
- partitionExprs.children.map(nodeToExpr), withDistinct)
- case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
- Sort(
- perPartitionOrdering.children.map(nodeToSortOrder), global = false,
- RepartitionByExpression(
- partitionExprs.children.map(nodeToExpr),
- withDistinct))
- case (None, None, None, Some(clusterExprs)) =>
- Sort(
- clusterExprs.children.map(nodeToExpr).map(SortOrder(_, Ascending)),
- global = false,
- RepartitionByExpression(
- clusterExprs.children.map(nodeToExpr),
- withDistinct))
- case (None, None, None, None) => withDistinct
- case _ => sys.error("Unsupported set of ordering / distribution clauses.")
- }
-
- val withLimit =
- limitClause.map(l => nodeToExpr(l.children.head))
- .map(Limit(_, withSort))
- .getOrElse(withSort)
-
- // Collect all window specifications defined in the WINDOW clause.
- val windowDefinitions = windowClause.map(_.children.collect {
- case Token("TOK_WINDOWDEF",
- Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
- windowName -> nodesToWindowSpecification(spec)
- }.toMap)
- // Handle cases like
- // window w1 as (partition by p_mfgr order by p_name
- // range between 2 preceding and 2 following),
- // w2 as w1
- val resolvedCrossReference = windowDefinitions.map {
- windowDefMap => windowDefMap.map {
- case (windowName, WindowSpecReference(other)) =>
- (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition])
- case o => o.asInstanceOf[(String, WindowSpecDefinition)]
- }
- }
-
- val withWindowDefinitions =
- resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit)
-
- // TOK_INSERT_INTO means to add files to the table.
- // TOK_DESTINATION means to overwrite the table.
- val resultDestination =
- (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
- val overwrite = intoClause.isEmpty
- nodeToDest(
- resultDestination,
- withWindowDefinitions,
- overwrite)
- }
-
- // If there are multiple INSERTS just UNION them together into one query.
- val query = if (queries.length == 1) queries.head else Union(queries)
-
- // return With plan if there is CTE
- cteRelations.map(With(query, _)).getOrElse(query)
-
- case Token("TOK_UNIONALL", left :: right :: Nil) =>
- Union(nodeToPlan(left), nodeToPlan(right))
- case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
- Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
- case Token("TOK_EXCEPT", left :: right :: Nil) =>
- Except(nodeToPlan(left), nodeToPlan(right))
- case Token("TOK_INTERSECT", left :: right :: Nil) =>
- Intersect(nodeToPlan(left), nodeToPlan(right))
-
- case _ =>
- noParseRule("Plan", node)
- }
-
- val allJoinTokens = "(TOK_.*JOIN)".r
- val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
- protected def nodeToRelation(node: ASTNode): LogicalPlan = {
- node match {
- case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
- SubqueryAlias(cleanIdentifier(alias), nodeToPlan(query))
-
- case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
- nodeToGenerate(
- selectClause,
- outer = isOuter.nonEmpty,
- nodeToRelation(relationClause))
-
- /* All relations, possibly with aliases or sampling clauses. */
- case Token("TOK_TABREF", clauses) =>
- // If the last clause is not a token then it's the alias of the table.
- val (nonAliasClauses, aliasClause) =
- if (clauses.last.text.startsWith("TOK")) {
- (clauses, None)
- } else {
- (clauses.dropRight(1), Some(clauses.last))
- }
-
- val (Some(tableNameParts) ::
- splitSampleClause ::
- bucketSampleClause :: Nil) = {
- getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"),
- nonAliasClauses)
- }
-
- val tableIdent = extractTableIdent(tableNameParts)
- val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
- val relation = UnresolvedRelation(tableIdent, alias)
-
- // Apply sampling if requested.
- (bucketSampleClause orElse splitSampleClause).map {
- case Token("TOK_TABLESPLITSAMPLE",
- Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
- Limit(Literal(count.toInt), relation)
- case Token("TOK_TABLESPLITSAMPLE",
- Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
- // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
- // function takes X PERCENT as the input and the range of X is [0, 100], we need to
- // adjust the fraction.
- require(
- fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
- && fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
- s"Sampling fraction ($fraction) must be on interval [0, 100]")
- Sample(0.0, fraction.toDouble / 100, withReplacement = false,
- (math.random * 1000).toInt,
- relation)(
- isTableSample = true)
- case Token("TOK_TABLEBUCKETSAMPLE",
- Token(numerator, Nil) ::
- Token(denominator, Nil) :: Nil) =>
- val fraction = numerator.toDouble / denominator.toDouble
- Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, relation)(
- isTableSample = true)
- case a =>
- noParseRule("Sampling", a)
- }.getOrElse(relation)
-
- case Token(allJoinTokens(joinToken), relation1 :: relation2 :: other) =>
- if (!(other.size <= 1)) {
- sys.error(s"Unsupported join operation: $other")
- }
-
- val joinType = joinToken match {
- case "TOK_JOIN" => Inner
- case "TOK_CROSSJOIN" => Inner
- case "TOK_RIGHTOUTERJOIN" => RightOuter
- case "TOK_LEFTOUTERJOIN" => LeftOuter
- case "TOK_FULLOUTERJOIN" => FullOuter
- case "TOK_LEFTSEMIJOIN" => LeftSemi
- case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
- case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
- case "TOK_NATURALJOIN" => NaturalJoin(Inner)
- case "TOK_NATURALRIGHTOUTERJOIN" => NaturalJoin(RightOuter)
- case "TOK_NATURALLEFTOUTERJOIN" => NaturalJoin(LeftOuter)
- case "TOK_NATURALFULLOUTERJOIN" => NaturalJoin(FullOuter)
- }
- Join(nodeToRelation(relation1),
- nodeToRelation(relation2),
- joinType,
- other.headOption.map(nodeToExpr))
-
- case _ =>
- noParseRule("Relation", node)
- }
- }
-
- protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
- case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
- SortOrder(nodeToExpr(sortExpr), Ascending)
- case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
- SortOrder(nodeToExpr(sortExpr), Descending)
- case _ =>
- noParseRule("SortOrder", node)
- }
-
- val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
- protected def nodeToDest(
- node: ASTNode,
- query: LogicalPlan,
- overwrite: Boolean): LogicalPlan = node match {
- case Token(destinationToken(),
- Token("TOK_DIR",
- Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
- query
-
- case Token(destinationToken(),
- Token("TOK_TAB",
- tableArgs) :: Nil) =>
- val Some(tableNameParts) :: partitionClause :: Nil =
- getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
-
- val tableIdent = extractTableIdent(tableNameParts)
-
- val partitionKeys = partitionClause.map(_.children.map {
- // Parse partitions. We also make keys case insensitive.
- case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
- cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
- case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
- cleanIdentifier(key.toLowerCase) -> None
- }.toMap).getOrElse(Map.empty)
-
- InsertIntoTable(
- UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = false)
-
- case Token(destinationToken(),
- Token("TOK_TAB",
- tableArgs) ::
- Token("TOK_IFNOTEXISTS",
- ifNotExists) :: Nil) =>
- val Some(tableNameParts) :: partitionClause :: Nil =
- getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
-
- val tableIdent = extractTableIdent(tableNameParts)
-
- val partitionKeys = partitionClause.map(_.children.map {
- // Parse partitions. We also make keys case insensitive.
- case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
- cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
- case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
- cleanIdentifier(key.toLowerCase) -> None
- }.toMap).getOrElse(Map.empty)
-
- InsertIntoTable(
- UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, ifNotExists = true)
-
- case _ =>
- noParseRule("Destination", node)
- }
-
- protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = node match {
- case Token("TOK_SELEXPR", e :: Nil) =>
- Some(nodeToExpr(e))
-
- case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
- Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
-
- case Token("TOK_SELEXPR", e :: aliasChildren) =>
- val aliasNames = aliasChildren.collect {
- case Token(name, Nil) => cleanIdentifier(name)
- }
- Some(MultiAlias(nodeToExpr(e), aliasNames))
-
- /* Hints are ignored */
- case Token("TOK_HINTLIST", _) => None
-
- case _ =>
- noParseRule("Select", node)
- }
-
- protected val escapedIdentifier = "`(.+)`".r
- protected val doubleQuotedString = "\"([^\"]+)\"".r
- protected val singleQuotedString = "'([^']+)'".r
-
- protected def unquoteString(str: String) = str match {
- case singleQuotedString(s) => s
- case doubleQuotedString(s) => s
- case other => other
- }
-
- /** Strips backticks from ident if present */
- protected def cleanIdentifier(ident: String): String = ident match {
- case escapedIdentifier(i) => i
- case plainIdent => plainIdent
- }
-
- /* Case insensitive matches */
- val COUNT = "(?i)COUNT".r
- val SUM = "(?i)SUM".r
- val AND = "(?i)AND".r
- val OR = "(?i)OR".r
- val NOT = "(?i)NOT".r
- val TRUE = "(?i)TRUE".r
- val FALSE = "(?i)FALSE".r
- val LIKE = "(?i)LIKE".r
- val RLIKE = "(?i)RLIKE".r
- val REGEXP = "(?i)REGEXP".r
- val IN = "(?i)IN".r
- val DIV = "(?i)DIV".r
- val BETWEEN = "(?i)BETWEEN".r
- val WHEN = "(?i)WHEN".r
- val CASE = "(?i)CASE".r
-
- val INTEGRAL = "[+-]?\\d+".r
- val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
-
- protected def nodeToExpr(node: ASTNode): Expression = node match {
- /* Attribute References */
- case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
- UnresolvedAttribute.quoted(cleanIdentifier(name))
- case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
- nodeToExpr(qualifier) match {
- case UnresolvedAttribute(nameParts) =>
- UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
- case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
- }
- case Token("TOK_SUBQUERY_EXPR", Token("TOK_SUBQUERY_OP", Nil) :: subquery :: Nil) =>
- ScalarSubquery(nodeToPlan(subquery))
-
- /* Stars (*) */
- case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
- // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
- // has a single child which is tableName.
- case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
- UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
-
- /* Aggregate Functions */
- case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
- Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
- case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
- Count(Literal(1)).toAggregateExpression()
-
- /* Casts */
- case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), StringType)
- case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), StringType)
- case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), StringType)
- case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), IntegerType)
- case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), LongType)
- case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), FloatType)
- case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DoubleType)
- case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), ShortType)
- case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), ByteType)
- case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), BinaryType)
- case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), BooleanType)
- case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, scale.text.toInt))
- case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
- case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
- case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), TimestampType)
- case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DateType)
-
- /* Arithmetic */
- case Token("+", child :: Nil) => nodeToExpr(child)
- case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
- case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
- case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
- case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
- case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
- case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
- case Token(DIV(), left :: right:: Nil) =>
- Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
- case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
- case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
- case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
- case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
-
- /* Comparisons */
- case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
- case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
- case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
- case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
- case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
- case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
- case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
- case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
- case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
- case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right))
- case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
- case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
- case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
- IsNotNull(nodeToExpr(child))
- case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
- IsNull(nodeToExpr(child))
- case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
- In(nodeToExpr(value), list.map(nodeToExpr))
- case Token("TOK_FUNCTION",
- Token(BETWEEN(), Nil) ::
- kw ::
- target ::
- minValue ::
- maxValue :: Nil) =>
-
- val targetExpression = nodeToExpr(target)
- val betweenExpr =
- And(
- GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
- LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
- kw match {
- case Token("KW_FALSE", Nil) => betweenExpr
- case Token("KW_TRUE", Nil) => Not(betweenExpr)
- }
-
- /* Boolean Logic */
- case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
- case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
- case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
- case Token("!", child :: Nil) => Not(nodeToExpr(child))
-
- /* Case statements */
- case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
- CaseWhen.createFromParser(branches.map(nodeToExpr))
- case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
- val keyExpr = nodeToExpr(branches.head)
- CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
-
- /* Complex datatype manipulation */
- case Token("[", child :: ordinal :: Nil) =>
- UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
-
- /* Window Functions */
- case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
- val function = nodeToExpr(node.copy(children = node.children.init))
- nodesToWindowSpecification(spec) match {
- case reference: WindowSpecReference =>
- UnresolvedWindowExpression(function, reference)
- case definition: WindowSpecDefinition =>
- WindowExpression(function, definition)
- }
-
- /* UDFs - Must be last otherwise will preempt built in functions */
- case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
- UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
- // Aggregate function with DISTINCT keyword.
- case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
- UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
- case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
- UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)
-
- /* Literals */
- case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
- case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
- case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
- case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
- Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
-
- case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
- Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
-
- case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
- Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
-
- case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
- Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
-
- case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
- Literal(ast.text.toDouble)
-
- case ast if ast.tokenType == SparkSqlParser.Number =>
- val text = ast.text
- text match {
- case INTEGRAL() =>
- BigDecimal(text) match {
- case v if v.isValidInt =>
- Literal(v.intValue())
- case v if v.isValidLong =>
- Literal(v.longValue())
- case v => Literal(v.underlying())
- }
- case DECIMAL(_*) =>
- Literal(BigDecimal(text).underlying())
- case _ =>
- // Convert a scientifically notated decimal into a double.
- Literal(text.toDouble)
- }
- case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
- Literal(ParseUtils.unescapeSQLString(ast.text))
-
- case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
- Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
-
- case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
- Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
-
- case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
- Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
-
- case Token("TOK_INTERVAL", elements) =>
- var interval = new CalendarInterval(0, 0)
- var updated = false
- elements.foreach {
- // The interval node will always contain children for all possible time units. A child node
- // is only useful when it contains exactly one (numeric) child.
- case e @ Token(name, Token(value, Nil) :: Nil) =>
- val unit = name match {
- case "TOK_INTERVAL_YEAR_LITERAL" => "year"
- case "TOK_INTERVAL_MONTH_LITERAL" => "month"
- case "TOK_INTERVAL_WEEK_LITERAL" => "week"
- case "TOK_INTERVAL_DAY_LITERAL" => "day"
- case "TOK_INTERVAL_HOUR_LITERAL" => "hour"
- case "TOK_INTERVAL_MINUTE_LITERAL" => "minute"
- case "TOK_INTERVAL_SECOND_LITERAL" => "second"
- case "TOK_INTERVAL_MILLISECOND_LITERAL" => "millisecond"
- case "TOK_INTERVAL_MICROSECOND_LITERAL" => "microsecond"
- case _ => noParseRule(s"Interval($name)", e)
- }
- interval = interval.add(CalendarInterval.fromSingleUnitString(unit, value))
- updated = true
- case _ =>
- }
- if (!updated) {
- throw new AnalysisException("at least one time unit should be given for interval literal")
- }
- Literal(interval)
-
- case _ =>
- noParseRule("Expression", node)
- }
-
- /* Case insensitive matches for Window Specification */
- val PRECEDING = "(?i)preceding".r
- val FOLLOWING = "(?i)following".r
- val CURRENT = "(?i)current".r
- protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match {
- case Token(windowName, Nil) :: Nil =>
- // Refer to a window spec defined in the window clause.
- WindowSpecReference(windowName)
- case Nil =>
- // OVER()
- WindowSpecDefinition(
- partitionSpec = Nil,
- orderSpec = Nil,
- frameSpecification = UnspecifiedFrame)
- case spec =>
- val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
- getClauses(
- Seq(
- "TOK_PARTITIONINGSPEC",
- "TOK_WINDOWRANGE",
- "TOK_WINDOWVALUES"),
- spec)
-
- // Handle Partition By and Order By.
- val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering =>
- val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) =
- getClauses(
- Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"),
- partitionAndOrdering.children)
-
- (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match {
- case (Some(partitionByExpr), Some(orderByExpr), None) =>
- (partitionByExpr.children.map(nodeToExpr),
- orderByExpr.children.map(nodeToSortOrder))
- case (Some(partitionByExpr), None, None) =>
- (partitionByExpr.children.map(nodeToExpr), Nil)
- case (None, Some(orderByExpr), None) =>
- (Nil, orderByExpr.children.map(nodeToSortOrder))
- case (None, None, Some(clusterByExpr)) =>
- val expressions = clusterByExpr.children.map(nodeToExpr)
- (expressions, expressions.map(SortOrder(_, Ascending)))
- case _ =>
- noParseRule("Partition & Ordering", partitionAndOrdering)
- }
- }.getOrElse {
- (Nil, Nil)
- }
-
- // Handle Window Frame
- val windowFrame =
- if (rowFrame.isEmpty && rangeFrame.isEmpty) {
- UnspecifiedFrame
- } else {
- val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
- def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
- case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
- if (count.toLowerCase() == "unbounded") {
- UnboundedPreceding
- } else {
- ValuePreceding(count.toInt)
- }
- case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
- if (count.toLowerCase() == "unbounded") {
- UnboundedFollowing
- } else {
- ValueFollowing(count.toInt)
- }
- case Token(CURRENT(), Nil) => CurrentRow
- case _ =>
- noParseRule("Window Frame Boundary", node)
- }
-
- rowFrame.orElse(rangeFrame).map { frame =>
- frame.children match {
- case precedingNode :: followingNode :: Nil =>
- SpecifiedWindowFrame(
- frameType,
- nodeToBoundary(precedingNode),
- nodeToBoundary(followingNode))
- case precedingNode :: Nil =>
- SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow)
- case _ =>
- noParseRule("Window Frame", frame)
- }
- }.getOrElse(sys.error(s"If you see this, please file a bug report with your query."))
- }
-
- WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
- }
-
- protected def nodeToTransformation(
- node: ASTNode,
- child: LogicalPlan): Option[ScriptTransformation] = None
-
- val explode = "(?i)explode".r
- val jsonTuple = "(?i)json_tuple".r
- protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
- val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
-
- val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
-
- val generator = clauses.head match {
- case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>
- Explode(nodeToExpr(childNode))
- case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
- JsonTuple(children.map(nodeToExpr))
- case other =>
- nodeToGenerator(other)
- }
-
- val attributes = clauses.collect {
- case Token(a, Nil) => UnresolvedAttribute(a.toLowerCase)
- }
-
- Generate(generator, join = true, outer = outer, Some(alias.toLowerCase), attributes, child)
- }
-
- protected def nodeToGenerator(node: ASTNode): Generator = noParseRule("Generator", node)
-
- protected def noParseRule(msg: String, node: ASTNode): Nothing = throw new NotImplementedError(
- s"[$msg]: No parse rules for ASTNode type: ${node.tokenType}, tree:\n${node.treeString}")
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
deleted file mode 100644
index 24ec452..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserInterface.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/**
- * Interface for a parser.
- */
-trait ParserInterface {
- /** Creates LogicalPlan for a given SQL string. */
- def parsePlan(sqlText: String): LogicalPlan
-
- /** Creates Expression for a given SQL string. */
- def parseExpression(sqlText: String): Expression
-
- /** Creates TableIdentifier for a given SQL string. */
- def parseTableIdentifier(sqlText: String): TableIdentifier
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/b7d41474/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
new file mode 100644
index 0000000..7b456a6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSparkSQLParser.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
+import scala.util.parsing.input.CharArrayReader.EofCh
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+private[sql] abstract class AbstractSparkSQLParser
+ extends StandardTokenParsers with PackratParsers with ParserInterface {
+
+ def parsePlan(input: String): LogicalPlan = synchronized {
+ // Initialize the Keywords.
+ initLexical
+ phrase(start)(new lexical.Scanner(input)) match {
+ case Success(plan, _) => plan
+ case failureOrError => sys.error(failureOrError.toString)
+ }
+ }
+ /* One time initialization of lexical.This avoid reinitialization of lexical in parse method */
+ protected lazy val initLexical: Unit = lexical.initialize(reservedWords)
+
+ protected case class Keyword(str: String) {
+ def normalize: String = lexical.normalizeKeyword(str)
+ def parser: Parser[String] = normalize
+ }
+
+ protected implicit def asParser(k: Keyword): Parser[String] = k.parser
+
+ // By default, use Reflection to find the reserved words defined in the sub class.
+ // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
+ // method during the parent class instantiation, because the sub class instance
+ // isn't created yet.
+ protected lazy val reservedWords: Seq[String] =
+ this
+ .getClass
+ .getMethods
+ .filter(_.getReturnType == classOf[Keyword])
+ .map(_.invoke(this).asInstanceOf[Keyword].normalize)
+
+ // Set the keywords as empty by default, will change that later.
+ override val lexical = new SqlLexical
+
+ protected def start: Parser[LogicalPlan]
+
+ // Returns the whole input string
+ protected lazy val wholeInput: Parser[String] = new Parser[String] {
+ def apply(in: Input): ParseResult[String] =
+ Success(in.source.toString, in.drop(in.source.length()))
+ }
+
+ // Returns the rest of the input string that are not parsed yet
+ protected lazy val restInput: Parser[String] = new Parser[String] {
+ def apply(in: Input): ParseResult[String] =
+ Success(
+ in.source.subSequence(in.offset, in.source.length()).toString,
+ in.drop(in.source.length()))
+ }
+}
+
+class SqlLexical extends StdLexical {
+ case class DecimalLit(chars: String) extends Token {
+ override def toString: String = chars
+ }
+
+ /* This is a work around to support the lazy setting */
+ def initialize(keywords: Seq[String]): Unit = {
+ reserved.clear()
+ reserved ++= keywords
+ }
+
+ /* Normal the keyword string */
+ def normalizeKeyword(str: String): String = str.toLowerCase
+
+ delimiters += (
+ "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
+ ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
+ )
+
+ protected override def processIdent(name: String) = {
+ val token = normalizeKeyword(name)
+ if (reserved contains token) Keyword(token) else Identifier(name)
+ }
+
+ override lazy val token: Parser[Token] =
+ ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
+ | '.' ~> (rep1(digit) ~ scientificNotation) ^^
+ { case i ~ s => DecimalLit("0." + i.mkString + s) }
+ | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
+ { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
+ | digit.* ~ identChar ~ (identChar | digit).* ^^
+ { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
+ | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
+ case i ~ None => NumericLit(i.mkString)
+ case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
+ }
+ | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
+ { case chars => StringLit(chars mkString "") }
+ | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
+ { case chars => StringLit(chars mkString "") }
+ | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
+ { case chars => Identifier(chars mkString "") }
+ | EofCh ^^^ EOF
+ | '\'' ~> failure("unclosed string literal")
+ | '"' ~> failure("unclosed string literal")
+ | delim
+ | failure("illegal character")
+ )
+
+ override def identChar: Parser[Elem] = letter | elem('_')
+
+ private lazy val scientificNotation: Parser[String] =
+ (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
+ case s ~ rest => "e" + s.mkString + rest.mkString
+ }
+
+ override def whitespace: Parser[Any] =
+ ( whitespaceChar
+ | '/' ~ '*' ~ comment
+ | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
+ | '#' ~ chrExcept(EofCh, '\n').*
+ | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
+ | '/' ~ '*' ~ failure("unclosed comment")
+ ).*
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org