You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/06/10 17:47:53 UTC
[spark] branch master updated: [SPARK-39434][SQL] Provide runtime error query context when array index is out of bounding
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c73c9c029bc [SPARK-39434][SQL] Provide runtime error query context when array index is out of bounding
c73c9c029bc is described below
commit c73c9c029bcaca79964438a9c166ab218eb6351d
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Fri Jun 10 10:47:17 2022 -0700
[SPARK-39434][SQL] Provide runtime error query context when array index is out of bounding
### What changes were proposed in this pull request?
Provide runtime error query context when array index is out of bounding, including
* operator `[]`
* function element_at
* function elt
### Why are the changes needed?
Provide SQL query context of runtime errors to users, so that they can understand it better.
### Does this PR introduce _any_ user-facing change?
Yes, improve the runtime error message of array index out of bounding error
### How was this patch tested?
UT
Closes #36826 from gengliangwang/arrayOutofIndex.
Authored-by: Gengliang Wang <ge...@apache.org>
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
.../scala/org/apache/spark/SparkException.scala | 7 +++--
.../expressions/collectionOperations.scala | 8 +++--
.../expressions/complexTypeExtractors.scala | 21 ++++++++++---
.../catalyst/expressions/stringExpressions.scala | 14 +++++++--
.../spark/sql/errors/QueryExecutionErrors.scala | 25 +++++++--------
.../resources/sql-tests/results/ansi/array.sql.out | 36 ++++++++++++++++++++++
6 files changed, 86 insertions(+), 25 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala
index 53942054f1b..739d2aab23b 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -371,9 +371,12 @@ private[spark] class SparkSecurityException(
private[spark] class SparkArrayIndexOutOfBoundsException(
errorClass: String,
errorSubClass: Option[String] = None,
- messageParameters: Array[String])
+ messageParameters: Array[String],
+ queryContext: String)
extends ArrayIndexOutOfBoundsException(
- SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, messageParameters))
+ // scalastyle:off line.size.limit
+ SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, messageParameters, queryContext))
+ // scalastyle:on line.size.limit
with SparkThrowable {
override def getMessageParameters: Array[String] = messageParameters
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index f38beb480e6..9254cbefeb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -2177,7 +2177,8 @@ case class ElementAt(
val index = ordinal.asInstanceOf[Int]
if (array.numElements() < math.abs(index)) {
if (failOnError) {
- throw QueryExecutionErrors.invalidElementAtIndexError(index, array.numElements())
+ throw QueryExecutionErrors.invalidElementAtIndexError(
+ index, array.numElements(), queryContext)
} else {
defaultValueOutOfBound match {
case Some(value) => value.eval()
@@ -2219,7 +2220,10 @@ case class ElementAt(
}
val indexOutOfBoundBranch = if (failOnError) {
- s"throw QueryExecutionErrors.invalidElementAtIndexError($index, $eval1.numElements());"
+ val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+ // scalastyle:off line.size.limit
+ s"throw QueryExecutionErrors.invalidElementAtIndexError($index, $eval1.numElements(), $errorContext);"
+ // scalastyle:on line.size.limit
} else {
defaultValueOutOfBound match {
case Some(value) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index b84050c1837..b2db00cd2b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -235,8 +235,11 @@ case class GetArrayStructFields(
case class GetArrayItem(
child: Expression,
ordinal: Expression,
- failOnError: Boolean = SQLConf.get.strictIndexOperator)
- extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue {
+ failOnError: Boolean = SQLConf.get.strictIndexOperator) extends BinaryExpression
+ with GetArrayItemUtil
+ with ExpectsInputTypes
+ with ExtractValue
+ with SupportQueryContext {
// We have done type checking for child in `ExtractValue`, so only need to check the `ordinal`.
override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegralType)
@@ -263,7 +266,8 @@ case class GetArrayItem(
val index = ordinal.asInstanceOf[Number].intValue()
if (index >= baseValue.numElements() || index < 0) {
if (failOnError) {
- throw QueryExecutionErrors.invalidArrayIndexError(index, baseValue.numElements)
+ throw QueryExecutionErrors.invalidArrayIndexError(
+ index, baseValue.numElements, queryContext)
} else {
null
}
@@ -287,7 +291,10 @@ case class GetArrayItem(
}
val indexOutOfBoundBranch = if (failOnError) {
- s"throw QueryExecutionErrors.invalidArrayIndexError($index, $eval1.numElements());"
+ val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+ // scalastyle:off line.size.limit
+ s"throw QueryExecutionErrors.invalidArrayIndexError($index, $eval1.numElements(), $errorContext);"
+ // scalastyle:on line.size.limit
} else {
s"${ev.isNull} = true;"
}
@@ -306,6 +313,12 @@ case class GetArrayItem(
override protected def withNewChildrenInternal(
newLeft: Expression, newRight: Expression): GetArrayItem =
copy(child = newLeft, ordinal = newRight)
+
+ override def initQueryContext(): String = if (failOnError) {
+ origin.context
+ } else {
+ ""
+ }
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index c56a1dc47ae..a4c5af582fa 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -258,7 +258,8 @@ case class ConcatWs(children: Seq[Expression])
// scalastyle:on line.size.limit
case class Elt(
children: Seq[Expression],
- failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression {
+ failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression
+ with SupportQueryContext {
def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
@@ -297,7 +298,7 @@ case class Elt(
val index = indexObj.asInstanceOf[Int]
if (index <= 0 || index > inputExprs.length) {
if (failOnError) {
- throw QueryExecutionErrors.invalidInputIndexError(index, inputExprs.length)
+ throw QueryExecutionErrors.invalidArrayIndexError(index, inputExprs.length, queryContext)
} else {
null
}
@@ -349,10 +350,11 @@ case class Elt(
}.mkString)
val indexOutOfBoundBranch = if (failOnError) {
+ val errorContext = ctx.addReferenceObj("errCtx", queryContext)
// scalastyle:off line.size.limit
s"""
|if (!$indexMatched) {
- | throw QueryExecutionErrors.invalidInputIndexError(${index.value}, ${inputExprs.length});
+ | throw QueryExecutionErrors.invalidArrayIndexError(${index.value}, ${inputExprs.length}, $errorContext);
|}
""".stripMargin
// scalastyle:on line.size.limit
@@ -381,6 +383,12 @@ case class Elt(
override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Elt =
copy(children = newChildren)
+
+ override def initQueryContext(): String = if (failOnError) {
+ origin.context
+ } else {
+ ""
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 2b573b2385c..657c7c74b9e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -184,34 +184,31 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
queryContext = context)
}
- def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = {
- invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_ENABLED.key)
- }
-
- def invalidInputIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = {
- invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_ENABLED.key)
- }
-
- private def invalidArrayIndexErrorInternal(
+ def invalidArrayIndexError(
index: Int,
numElements: Int,
- key: String): ArrayIndexOutOfBoundsException = {
+ context: String): ArrayIndexOutOfBoundsException = {
new SparkArrayIndexOutOfBoundsException(
errorClass = "INVALID_ARRAY_INDEX",
messageParameters = Array(
- toSQLValue(index, IntegerType), toSQLValue(numElements, IntegerType), toSQLConf(key)))
+ toSQLValue(index, IntegerType),
+ toSQLValue(numElements, IntegerType),
+ toSQLConf(SQLConf.ANSI_ENABLED.key)),
+ queryContext = context)
}
def invalidElementAtIndexError(
- index: Int,
- numElements: Int): ArrayIndexOutOfBoundsException = {
+ index: Int,
+ numElements: Int,
+ context: String): ArrayIndexOutOfBoundsException = {
new SparkArrayIndexOutOfBoundsException(
errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
messageParameters =
Array(
toSQLValue(index, IntegerType),
toSQLValue(numElements, IntegerType),
- toSQLConf(SQLConf.ANSI_ENABLED.key)))
+ toSQLConf(SQLConf.ANSI_ENABLED.key)),
+ queryContext = context)
}
def mapKeyNotExistError(key: Any, dataType: DataType, context: String): NoSuchElementException = {
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 889523537f4..f94a740f790 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -169,6 +169,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select element_at(array(1, 2, 3), 5)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -178,6 +181,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select element_at(array(1, 2, 3), -5)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -196,6 +202,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(4, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -205,6 +214,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(0, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -214,6 +226,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(-1, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -255,6 +270,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index 5 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select array(1, 2, 3)[5]
+ ^^^^^^^^^^^^^^^^^
-- !query
@@ -264,6 +282,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index -1 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select array(1, 2, 3)[-1]
+ ^^^^^^^^^^^^^^^^^^
-- !query
@@ -338,6 +359,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select element_at(array(1, 2, 3), 5)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -347,6 +371,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select element_at(array(1, 2, 3), -5)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -365,6 +392,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(4, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -374,6 +404,9 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(0, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^
-- !query
@@ -383,3 +416,6 @@ struct<>
-- !query output
org.apache.spark.SparkArrayIndexOutOfBoundsException
[INVALID_ARRAY_INDEX] The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+select elt(-1, '123', '456')
+ ^^^^^^^^^^^^^^^^^^^^^
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org