You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/07/09 19:01:05 UTC

spark git commit: [SPARK-8948][SQL] Remove ExtractValueWithOrdinal abstract class

Repository: spark
Updated Branches:
  refs/heads/master 59cc38944 -> e204d22bb


[SPARK-8948][SQL] Remove ExtractValueWithOrdinal abstract class

Also added more documentation for the file.

Author: Reynold Xin <rx...@databricks.com>

Closes #7316 from rxin/extract-value and squashes the following commits:

069cb7e [Reynold Xin] Removed ExtractValueWithOrdinal.
621b705 [Reynold Xin] Reverted a line.
11ebd6c [Reynold Xin] [Minor][SQL] Improve documentation for complex type extractors.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e204d22b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e204d22b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e204d22b

Branch: refs/heads/master
Commit: e204d22bb70f28b1cc090ab60f12078479be4ae0
Parents: 59cc389
Author: Reynold Xin <rx...@databricks.com>
Authored: Thu Jul 9 10:01:01 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Thu Jul 9 10:01:01 2015 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/ExtractValue.scala | 253 ------------------
 .../expressions/complexTypeExtractors.scala     | 267 +++++++++++++++++++
 2 files changed, 267 insertions(+), 253 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e204d22b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
deleted file mode 100644
index 2b25ba0..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import scala.collection.Map
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext}
-import org.apache.spark.sql.types._
-
-
-object ExtractValue {
-  /**
-   * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
-   * depend on the type of `child` and `extraction`.
-   *
-   *   `child`      |    `extraction`    |    concrete `ExtractValue`
-   * ----------------------------------------------------------------
-   *    Struct      |   Literal String   |        GetStructField
-   * Array[Struct]  |   Literal String   |     GetArrayStructFields
-   *    Array       |   Integral type    |         GetArrayItem
-   *     Map        |      Any type      |         GetMapValue
-   */
-  def apply(
-      child: Expression,
-      extraction: Expression,
-      resolver: Resolver): Expression = {
-
-    (child.dataType, extraction) match {
-      case (StructType(fields), NonNullLiteral(v, StringType)) =>
-        val fieldName = v.toString
-        val ordinal = findField(fields, fieldName, resolver)
-        GetStructField(child, fields(ordinal).copy(name = fieldName), ordinal)
-
-      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
-        val fieldName = v.toString
-        val ordinal = findField(fields, fieldName, resolver)
-        GetArrayStructFields(child, fields(ordinal).copy(name = fieldName), ordinal, containsNull)
-
-      case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType] =>
-        GetArrayItem(child, extraction)
-
-      case (_: MapType, _) =>
-        GetMapValue(child, extraction)
-
-      case (otherType, _) =>
-        val errorMsg = otherType match {
-          case StructType(_) | ArrayType(StructType(_), _) =>
-            s"Field name should be String Literal, but it's $extraction"
-          case _: ArrayType =>
-            s"Array index should be integral type, but it's ${extraction.dataType}"
-          case other =>
-            s"Can't extract value from $child"
-        }
-        throw new AnalysisException(errorMsg)
-    }
-  }
-
-  def unapply(g: ExtractValue): Option[(Expression, Expression)] = {
-    g match {
-      case o: ExtractValueWithOrdinal => Some((o.child, o.ordinal))
-      case s: ExtractValueWithStruct => Some((s.child, null))
-    }
-  }
-
-  /**
-   * Find the ordinal of StructField, report error if no desired field or over one
-   * desired fields are found.
-   */
-  private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = {
-    val checkField = (f: StructField) => resolver(f.name, fieldName)
-    val ordinal = fields.indexWhere(checkField)
-    if (ordinal == -1) {
-      throw new AnalysisException(
-        s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
-    } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
-      throw new AnalysisException(
-        s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
-    } else {
-      ordinal
-    }
-  }
-}
-
-/**
- * A common interface of all kinds of extract value expressions.
- * Note: concrete extract value expressions are created only by `ExtractValue.apply`,
- * we don't need to do type check for them.
- */
-trait ExtractValue {
-  self: Expression =>
-}
-
-abstract class ExtractValueWithStruct extends UnaryExpression with ExtractValue {
-  self: Product =>
-
-  def field: StructField
-  override def toString: String = s"$child.${field.name}"
-}
-
-/**
- * Returns the value of fields in the Struct `child`.
- */
-case class GetStructField(child: Expression, field: StructField, ordinal: Int)
-  extends ExtractValueWithStruct {
-
-  override def dataType: DataType = field.dataType
-  override def nullable: Boolean = child.nullable || field.nullable
-
-  protected override def nullSafeEval(input: Any): Any =
-    input.asInstanceOf[InternalRow](ordinal)
-
-  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
-    nullSafeCodeGen(ctx, ev, eval => {
-      s"""
-        if ($eval.isNullAt($ordinal)) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.primitive} = ${ctx.getColumn(eval, dataType, ordinal)};
-        }
-      """
-    })
-  }
-}
-
-/**
- * Returns the array of value of fields in the Array of Struct `child`.
- */
-case class GetArrayStructFields(
-    child: Expression,
-    field: StructField,
-    ordinal: Int,
-    containsNull: Boolean) extends ExtractValueWithStruct {
-
-  override def dataType: DataType = ArrayType(field.dataType, containsNull)
-  override def nullable: Boolean = child.nullable || containsNull || field.nullable
-
-  protected override def nullSafeEval(input: Any): Any = {
-    input.asInstanceOf[Seq[InternalRow]].map { row =>
-      if (row == null) null else row(ordinal)
-    }
-  }
-
-  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
-    val arraySeqClass = "scala.collection.mutable.ArraySeq"
-    // TODO: consider using Array[_] for ArrayType child to avoid
-    // boxing of primitives
-    nullSafeCodeGen(ctx, ev, eval => {
-      s"""
-        final int n = $eval.size();
-        final $arraySeqClass<Object> values = new $arraySeqClass<Object>(n);
-        for (int j = 0; j < n; j++) {
-          InternalRow row = (InternalRow) $eval.apply(j);
-          if (row != null && !row.isNullAt($ordinal)) {
-            values.update(j, ${ctx.getColumn("row", field.dataType, ordinal)});
-          }
-        }
-        ${ev.primitive} = (${ctx.javaType(dataType)}) values;
-      """
-    })
-  }
-}
-
-abstract class ExtractValueWithOrdinal extends BinaryExpression with ExtractValue {
-  self: Product =>
-
-  def ordinal: Expression
-  def child: Expression
-
-  override def left: Expression = child
-  override def right: Expression = ordinal
-
-  /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = true
-  override def toString: String = s"$child[$ordinal]"
-}
-
-/**
- * Returns the field at `ordinal` in the Array `child`
- */
-case class GetArrayItem(child: Expression, ordinal: Expression)
-  extends ExtractValueWithOrdinal {
-
-  override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
-
-  protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
-    // TODO: consider using Array[_] for ArrayType child to avoid
-    // boxing of primitives
-    val baseValue = value.asInstanceOf[Seq[_]]
-    val index = ordinal.asInstanceOf[Number].intValue()
-    if (index >= baseValue.size || index < 0) {
-      null
-    } else {
-      baseValue(index)
-    }
-  }
-
-  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
-    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      s"""
-        final int index = (int)$eval2;
-        if (index >= $eval1.size() || index < 0) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply(index);
-        }
-      """
-    })
-  }
-}
-
-/**
- * Returns the value of key `ordinal` in Map `child`
- */
-case class GetMapValue(child: Expression, ordinal: Expression)
-  extends ExtractValueWithOrdinal {
-
-  override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
-
-  protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
-    val baseValue = value.asInstanceOf[Map[Any, _]]
-    baseValue.get(ordinal).orNull
-  }
-
-  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
-    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      s"""
-        if ($eval1.contains($eval2)) {
-          ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply($eval2);
-        } else {
-          ${ev.isNull} = true;
-        }
-      """
-    })
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/e204d22b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
new file mode 100644
index 0000000..73cc930
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.Map
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext}
+import org.apache.spark.sql.types._
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines all the expressions to extract values out of complex types.
+// For example, getting a field out of an array, map, or struct.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+object ExtractValue {
+  /**
+   * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
+   * depend on the type of `child` and `extraction`.
+   *
+   *   `child`      |    `extraction`    |    concrete `ExtractValue`
+   * ----------------------------------------------------------------
+   *    Struct      |   Literal String   |        GetStructField
+   * Array[Struct]  |   Literal String   |     GetArrayStructFields
+   *    Array       |   Integral type    |         GetArrayItem
+   *     Map        |      Any type      |         GetMapValue
+   */
+  def apply(
+      child: Expression,
+      extraction: Expression,
+      resolver: Resolver): Expression = {
+
+    (child.dataType, extraction) match {
+      case (StructType(fields), NonNullLiteral(v, StringType)) =>
+        val fieldName = v.toString
+        val ordinal = findField(fields, fieldName, resolver)
+        GetStructField(child, fields(ordinal).copy(name = fieldName), ordinal)
+
+      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
+        val fieldName = v.toString
+        val ordinal = findField(fields, fieldName, resolver)
+        GetArrayStructFields(child, fields(ordinal).copy(name = fieldName), ordinal, containsNull)
+
+      case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType] =>
+        GetArrayItem(child, extraction)
+
+      case (_: MapType, _) =>
+        GetMapValue(child, extraction)
+
+      case (otherType, _) =>
+        val errorMsg = otherType match {
+          case StructType(_) | ArrayType(StructType(_), _) =>
+            s"Field name should be String Literal, but it's $extraction"
+          case _: ArrayType =>
+            s"Array index should be integral type, but it's ${extraction.dataType}"
+          case other =>
+            s"Can't extract value from $child"
+        }
+        throw new AnalysisException(errorMsg)
+    }
+  }
+
+  def unapply(g: ExtractValue): Option[(Expression, Expression)] = g match {
+    case o: GetArrayItem => Some((o.child, o.ordinal))
+    case o: GetMapValue => Some((o.child, o.key))
+    case s: ExtractValueWithStruct => Some((s.child, null))
+  }
+
+  /**
+   * Find the ordinal of StructField, report error if no desired field or over one
+   * desired fields are found.
+   */
+  private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = {
+    val checkField = (f: StructField) => resolver(f.name, fieldName)
+    val ordinal = fields.indexWhere(checkField)
+    if (ordinal == -1) {
+      throw new AnalysisException(
+        s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
+    } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
+      throw new AnalysisException(
+        s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
+    } else {
+      ordinal
+    }
+  }
+}
+
+/**
+ * A common interface of all kinds of extract value expressions.
+ * Note: concrete extract value expressions are created only by `ExtractValue.apply`,
+ * we don't need to do type check for them.
+ */
+trait ExtractValue {
+  self: Expression =>
+}
+
+abstract class ExtractValueWithStruct extends UnaryExpression with ExtractValue {
+  self: Product =>
+
+  def field: StructField
+  override def toString: String = s"$child.${field.name}"
+}
+
+/**
+ * Returns the value of fields in the Struct `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetStructField(child: Expression, field: StructField, ordinal: Int)
+  extends ExtractValueWithStruct {
+
+  override def dataType: DataType = field.dataType
+  override def nullable: Boolean = child.nullable || field.nullable
+
+  protected override def nullSafeEval(input: Any): Any =
+    input.asInstanceOf[InternalRow](ordinal)
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    nullSafeCodeGen(ctx, ev, eval => {
+      s"""
+        if ($eval.isNullAt($ordinal)) {
+          ${ev.isNull} = true;
+        } else {
+          ${ev.primitive} = ${ctx.getColumn(eval, dataType, ordinal)};
+        }
+      """
+    })
+  }
+}
+
+/**
+ * Returns the array of value of fields in the Array of Struct `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetArrayStructFields(
+    child: Expression,
+    field: StructField,
+    ordinal: Int,
+    containsNull: Boolean) extends ExtractValueWithStruct {
+
+  override def dataType: DataType = ArrayType(field.dataType, containsNull)
+  override def nullable: Boolean = child.nullable || containsNull || field.nullable
+
+  protected override def nullSafeEval(input: Any): Any = {
+    input.asInstanceOf[Seq[InternalRow]].map { row =>
+      if (row == null) null else row(ordinal)
+    }
+  }
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    val arraySeqClass = "scala.collection.mutable.ArraySeq"
+    // TODO: consider using Array[_] for ArrayType child to avoid
+    // boxing of primitives
+    nullSafeCodeGen(ctx, ev, eval => {
+      s"""
+        final int n = $eval.size();
+        final $arraySeqClass<Object> values = new $arraySeqClass<Object>(n);
+        for (int j = 0; j < n; j++) {
+          InternalRow row = (InternalRow) $eval.apply(j);
+          if (row != null && !row.isNullAt($ordinal)) {
+            values.update(j, ${ctx.getColumn("row", field.dataType, ordinal)});
+          }
+        }
+        ${ev.primitive} = (${ctx.javaType(dataType)}) values;
+      """
+    })
+  }
+}
+
+/**
+ * Returns the field at `ordinal` in the Array `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetArrayItem(child: Expression, ordinal: Expression)
+  extends BinaryExpression with ExtractValue {
+
+  override def toString: String = s"$child[$ordinal]"
+
+  override def left: Expression = child
+  override def right: Expression = ordinal
+
+  /** `Null` is returned for invalid ordinals. */
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
+
+  protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
+    // TODO: consider using Array[_] for ArrayType child to avoid
+    // boxing of primitives
+    val baseValue = value.asInstanceOf[Seq[_]]
+    val index = ordinal.asInstanceOf[Number].intValue()
+    if (index >= baseValue.size || index < 0) {
+      null
+    } else {
+      baseValue(index)
+    }
+  }
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+      s"""
+        final int index = (int)$eval2;
+        if (index >= $eval1.size() || index < 0) {
+          ${ev.isNull} = true;
+        } else {
+          ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply(index);
+        }
+      """
+    })
+  }
+}
+
+/**
+ * Returns the value of key `ordinal` in Map `child`.
+ *
+ * No need to do type checking since it is handled by [[ExtractValue]].
+ */
+case class GetMapValue(child: Expression, key: Expression)
+  extends BinaryExpression with ExtractValue {
+
+  override def toString: String = s"$child[$key]"
+
+  override def left: Expression = child
+  override def right: Expression = key
+
+  /** `Null` is returned for invalid ordinals. */
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
+
+  protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
+    val baseValue = value.asInstanceOf[Map[Any, _]]
+    baseValue.get(ordinal).orNull
+  }
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+      s"""
+        if ($eval1.contains($eval2)) {
+          ${ev.primitive} = (${ctx.boxedType(dataType)})$eval1.apply($eval2);
+        } else {
+          ${ev.isNull} = true;
+        }
+      """
+    })
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org