You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/07/09 19:01:05 UTC
spark git commit: [SPARK-8948][SQL] Remove ExtractValueWithOrdinal
abstract class
Repository: spark
Updated Branches:
refs/heads/master 59cc38944 -> e204d22bb
[SPARK-8948][SQL] Remove ExtractValueWithOrdinal abstract class
Also added more documentation for the file.
Author: Reynold Xin <rx...@databricks.com>
Closes #7316 from rxin/extract-value and squashes the following commits:
069cb7e [Reynold Xin] Removed ExtractValueWithOrdinal.
621b705 [Reynold Xin] Reverted a line.
11ebd6c [Reynold Xin] [Minor][SQL] Improve documentation for complex type extractors.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e204d22b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e204d22b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e204d22b
Branch: refs/heads/master
Commit: e204d22bb70f28b1cc090ab60f12078479be4ae0
Parents: 59cc389
Author: Reynold Xin <rx...@databricks.com>
Authored: Thu Jul 9 10:01:01 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Thu Jul 9 10:01:01 2015 -0700
----------------------------------------------------------------------
.../sql/catalyst/expressions/ExtractValue.scala | 253 ------------------
.../expressions/complexTypeExtractors.scala | 267 +++++++++++++++++++
2 files changed, 267 insertions(+), 253 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e204d22b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
deleted file mode 100644
index 2b25ba0..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import scala.collection.Map
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext}
-import org.apache.spark.sql.types._
-
-
-object ExtractValue {
- /**
- * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
- * depend on the type of `child` and `extraction`.
- *
- * `child` | `extraction` | concrete `ExtractValue`
- * ----------------------------------------------------------------
- * Struct | Literal String | GetStructField
- * Array[Struct] | Literal String | GetArrayStructFields
- * Array | Integral type | GetArrayItem
- * Map | Any type | GetMapValue
- */
- def apply(
- child: Expression,
- extraction: Expression,
- resolver: Resolver): Expression = {
-
- (child.dataType, extraction) match {
- case (StructType(fields), NonNullLiteral(v, StringType)) =>
- val fieldName = v.toString
- val ordinal = findField(fields, fieldName, resolver)
- GetStructField(child, fields(ordinal).copy(name = fieldName), ordinal)
-
- case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
- val fieldName = v.toString
- val ordinal = findField(fields, fieldName, resolver)
- GetArrayStructFields(child, fields(ordinal).copy(name = fieldName), ordinal, containsNull)
-
- case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType] =>
- GetArrayItem(child, extraction)
-
- case (_: MapType, _) =>
- GetMapValue(child, extraction)
-
- case (otherType, _) =>
- val errorMsg = otherType match {
- case StructType(_) | ArrayType(StructType(_), _) =>
- s"Field name should be String Literal, but it's $extraction"
- case _: ArrayType =>
- s"Array index should be integral type, but it's ${extraction.dataType}"
- case other =>
- s"Can't extract value from $child"
- }
- throw new AnalysisException(errorMsg)
- }
- }
-
- def unapply(g: ExtractValue): Option[(Expression, Expression)] = {
- g match {
- case o: ExtractValueWithOrdinal => Some((o.child, o.ordinal))
- case s: ExtractValueWithStruct => Some((s.child, null))
- }
- }
-
- /**
- * Find the ordinal of StructField, report error if no desired field or over one
- * desired fields are found.
- */
- private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = {
- val checkField = (f: StructField) => resolver(f.name, fieldName)
- val ordinal = fields.indexWhere(checkField)
- if (ordinal == -1) {
- throw new AnalysisException(
- s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
- } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
- throw new AnalysisException(
- s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
- } else {
- ordinal
- }
- }
-}
-
-/**
- * A common interface of all kinds of extract value expressions.
- * Note: concrete extract value expressions are created only by `ExtractValue.apply`,
- * we don't need to do type check for them.
- */
-trait ExtractValue {
- self: Expression =>
-}
-
-abstract class ExtractValueWithStruct extends UnaryExpression with ExtractValue {
- self: Product =>
-
- def field: StructField
- override def toString: String = s"$child.${field.name}"
-}
-
-/**
- * Returns the value of fields in the Struct `child`.
- */
-case class GetStructField(child: Expression, field: StructField, ordinal: Int)
- extends ExtractValueWithStruct {
-
- override def dataType: DataType = field.dataType
- override def nullable: Boolean = child.nullable || field.nullable
-
- protected override def nullSafeEval(input: Any): Any =
- input.asInstanceOf[InternalRow](ordinal)
-
- override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
- nullSafeCodeGen(ctx, ev, eval => {
- s"""
- if ($eval.isNullAt($ordinal)) {
- ${ev.isNull} = true;
- } else {
- ${ev.primitive} = ${ctx.getColumn(eval, dataType, ordinal)};
- }
- """
- })
- }
-}
-
-/**
- * Returns the array of value of fields in the Array of Struct `child`.
- */
-case class GetArrayStructFields(
- child: Expression,
- field: StructField,
- ordinal: Int,
- containsNull: Boolean) extends ExtractValueWithStruct {
-
- override def dataType: DataType = ArrayType(field.dataType, containsNull)
- override def nullable: Boolean = child.nullable || containsNull || field.nullable
-
- protected override def nullSafeEval(input: Any): Any = {
- input.asInstanceOf[Seq[InternalRow]].map { row =>
- if (row == null) null else row(ordinal)
- }
- }
-
- override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
- val arraySeqClass = "scala.collection.mutable.ArraySeq"
- // TODO: consider using Array[_] for ArrayType child to avoid
- // boxing of primitives
- nullSafeCodeGen(ctx, ev, eval => {
- s"""
- final int n = $eval.size();
- final $arraySeqClass<Object> values = new $arraySeqClass<Object>(n);
- for (int j = 0; j < n; j++) {
- InternalRow row = (InternalRow) $eval.apply(j);
- if (row != null && !row.isNullAt($ordinal)) {
- values.update(j, ${ctx.getColumn("row", field.dataType, ordinal)});
- }
- }
- ${ev.primitive} = (${ctx.javaType(dataType)}) values;
- """
- })
- }
-}
-
-abstract class ExtractValueWithOrdinal extends BinaryExpression with ExtractValue {
- self: Product =>
-
- def ordinal: Expression
- def child: Expression
-
- override def left: Expression = child
- override def right: Expression = ordinal
-
- /** `Null` is returned for invalid ordinals. */
- override def nullable: Boolean = true
- override def toString: String = s"$child[$ordinal]"
-}
-
-/**
- * Returns the field at `ordinal` in the Array `child`
- */
-case class GetArrayItem(child: Expression, ordinal: Expression)
- extends ExtractValueWithOrdinal {
-
- override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
-
- protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
- // TODO: consider using Array[_] for ArrayType child to avoid
- // boxing of primitives
- val baseValue = value.asInstanceOf[Seq[_]]
- val index = ordinal.asInstanceOf[Number].intValue()
- if (index >= baseValue.size || index < 0) {
- null
- } else {
- baseValue(index)
- }
- }
-
- override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
- nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
- s"""
- final int index = (int)$eval2;
- if (index >= $eval1.size() || index < 0) {
- ${ev.isNull} = true;
- } else {
- ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply(index);
- }
- """
- })
- }
-}
-
-/**
- * Returns the value of key `ordinal` in Map `child`
- */
-case class GetMapValue(child: Expression, ordinal: Expression)
- extends ExtractValueWithOrdinal {
-
- override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
-
- protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
- val baseValue = value.asInstanceOf[Map[Any, _]]
- baseValue.get(ordinal).orNull
- }
-
- override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
- nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
- s"""
- if ($eval1.contains($eval2)) {
- ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply($eval2);
- } else {
- ${ev.isNull} = true;
- }
- """
- })
- }
-}
http://git-wip-us.apache.org/repos/asf/spark/blob/e204d22b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
new file mode 100644
index 0000000..73cc930
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.Map
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext}
+import org.apache.spark.sql.types._
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines all the expressions to extract values out of complex types.
+// For example, getting a field out of an array, map, or struct.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+object ExtractValue {
+ /**
+ * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
+ * depend on the type of `child` and `extraction`.
+ *
+ * `child` | `extraction` | concrete `ExtractValue`
+ * ----------------------------------------------------------------
+ * Struct | Literal String | GetStructField
+ * Array[Struct] | Literal String | GetArrayStructFields
+ * Array | Integral type | GetArrayItem
+ * Map | Any type | GetMapValue
+ */
+ def apply(
+ child: Expression,
+ extraction: Expression,
+ resolver: Resolver): Expression = {
+
+ (child.dataType, extraction) match {
+ case (StructType(fields), NonNullLiteral(v, StringType)) =>
+ val fieldName = v.toString
+ val ordinal = findField(fields, fieldName, resolver)
+ GetStructField(child, fields(ordinal).copy(name = fieldName), ordinal)
+
+ case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
+ val fieldName = v.toString
+ val ordinal = findField(fields, fieldName, resolver)
+ GetArrayStructFields(child, fields(ordinal).copy(name = fieldName), ordinal, containsNull)
+
+ case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType] =>
+ GetArrayItem(child, extraction)
+
+ case (_: MapType, _) =>
+ GetMapValue(child, extraction)
+
+ case (otherType, _) =>
+ val errorMsg = otherType match {
+ case StructType(_) | ArrayType(StructType(_), _) =>
+ s"Field name should be String Literal, but it's $extraction"
+ case _: ArrayType =>
+ s"Array index should be integral type, but it's ${extraction.dataType}"
+ case other =>
+ s"Can't extract value from $child"
+ }
+ throw new AnalysisException(errorMsg)
+ }
+ }
+
+ def unapply(g: ExtractValue): Option[(Expression, Expression)] = g match {
+ case o: GetArrayItem => Some((o.child, o.ordinal))
+ case o: GetMapValue => Some((o.child, o.key))
+ case s: ExtractValueWithStruct => Some((s.child, null))
+ }
+
+ /**
+ * Find the ordinal of StructField, report error if no desired field or over one
+ * desired fields are found.
+ */
+ private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = {
+ val checkField = (f: StructField) => resolver(f.name, fieldName)
+ val ordinal = fields.indexWhere(checkField)
+ if (ordinal == -1) {
+ throw new AnalysisException(
+ s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
+ } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
+ throw new AnalysisException(
+ s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
+ } else {
+ ordinal
+ }
+ }
+}
+
+/**
+ * A common interface of all kinds of extract value expressions.
+ * Note: concrete extract value expressions are created only by `ExtractValue.apply`,
+ * we don't need to do type check for them.
+ */
+trait ExtractValue {
+ self: Expression =>
+}
+
+abstract class ExtractValueWithStruct extends UnaryExpression with ExtractValue {
+ self: Product =>
+
+ def field: StructField
+ override def toString: String = s"$child.${field.name}"
+}
+
+/**
+ * Returns the value of fields in the Struct `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetStructField(child: Expression, field: StructField, ordinal: Int)
+ extends ExtractValueWithStruct {
+
+ override def dataType: DataType = field.dataType
+ override def nullable: Boolean = child.nullable || field.nullable
+
+ protected override def nullSafeEval(input: Any): Any =
+ input.asInstanceOf[InternalRow](ordinal)
+
+ override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+ nullSafeCodeGen(ctx, ev, eval => {
+ s"""
+ if ($eval.isNullAt($ordinal)) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.primitive} = ${ctx.getColumn(eval, dataType, ordinal)};
+ }
+ """
+ })
+ }
+}
+
+/**
+ * Returns the array of value of fields in the Array of Struct `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetArrayStructFields(
+ child: Expression,
+ field: StructField,
+ ordinal: Int,
+ containsNull: Boolean) extends ExtractValueWithStruct {
+
+ override def dataType: DataType = ArrayType(field.dataType, containsNull)
+ override def nullable: Boolean = child.nullable || containsNull || field.nullable
+
+ protected override def nullSafeEval(input: Any): Any = {
+ input.asInstanceOf[Seq[InternalRow]].map { row =>
+ if (row == null) null else row(ordinal)
+ }
+ }
+
+ override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+ val arraySeqClass = "scala.collection.mutable.ArraySeq"
+ // TODO: consider using Array[_] for ArrayType child to avoid
+ // boxing of primitives
+ nullSafeCodeGen(ctx, ev, eval => {
+ s"""
+ final int n = $eval.size();
+ final $arraySeqClass<Object> values = new $arraySeqClass<Object>(n);
+ for (int j = 0; j < n; j++) {
+ InternalRow row = (InternalRow) $eval.apply(j);
+ if (row != null && !row.isNullAt($ordinal)) {
+ values.update(j, ${ctx.getColumn("row", field.dataType, ordinal)});
+ }
+ }
+ ${ev.primitive} = (${ctx.javaType(dataType)}) values;
+ """
+ })
+ }
+}
+
+/**
+ * Returns the field at `ordinal` in the Array `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetArrayItem(child: Expression, ordinal: Expression)
+ extends BinaryExpression with ExtractValue {
+
+ override def toString: String = s"$child[$ordinal]"
+
+ override def left: Expression = child
+ override def right: Expression = ordinal
+
+ /** `Null` is returned for invalid ordinals. */
+ override def nullable: Boolean = true
+
+ override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
+
+ protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
+ // TODO: consider using Array[_] for ArrayType child to avoid
+ // boxing of primitives
+ val baseValue = value.asInstanceOf[Seq[_]]
+ val index = ordinal.asInstanceOf[Number].intValue()
+ if (index >= baseValue.size || index < 0) {
+ null
+ } else {
+ baseValue(index)
+ }
+ }
+
+ override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+ nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+ s"""
+ final int index = (int)$eval2;
+ if (index >= $eval1.size() || index < 0) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply(index);
+ }
+ """
+ })
+ }
+}
+
+/**
+ * Returns the value of key `ordinal` in Map `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetMapValue(child: Expression, key: Expression)
+ extends BinaryExpression with ExtractValue {
+
+ override def toString: String = s"$child[$key]"
+
+ override def left: Expression = child
+ override def right: Expression = key
+
+ /** `Null` is returned for invalid ordinals. */
+ override def nullable: Boolean = true
+
+ override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
+
+ protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
+ val baseValue = value.asInstanceOf[Map[Any, _]]
+ baseValue.get(ordinal).orNull
+ }
+
+ override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+ nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+ s"""
+ if ($eval1.contains($eval2)) {
+ ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply($eval2);
+ } else {
+ ${ev.isNull} = true;
+ }
+ """
+ })
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org