You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "angerszhu (Jira)" <ji...@apache.org> on 2020/06/18 09:13:00 UTC

[jira] [Commented] (SPARK-32002) spark error while select nest data

    [ https://issues.apache.org/jira/browse/SPARK-32002?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17139248#comment-17139248 ] 

angerszhu commented on SPARK-32002:
-----------------------------------

{code:java}
// code placeholder
scala> sql("select a.b from nest_table").schema
res6: org.apache.spark.sql.types.StructType = StructType(StructField(b,ArrayType(ArrayType(StructType(StructField(c,ArrayType(LongType,true),true)),true),true),true))scala> sql("select a from nest_table").schema
res7: org.apache.spark.sql.types.StructType = StructType(StructField(a,ArrayType(StructType(StructField(b,ArrayType(StructType(StructField(c,ArrayType(LongType,true),true)),true),true)),true),true))scala> sql("select a.b[0].c from nest_table").show()
+----------------+
|a.b AS b#34[0].c|
+----------------+
|        [[1, 2]]|
|      [[1], [2]]|
+----------------+
scala>{code}

> spark error while select nest data
> ----------------------------------
>
>                 Key: SPARK-32002
>                 URL: https://issues.apache.org/jira/browse/SPARK-32002
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.4
>            Reporter: Yiqun Zhang
>            Priority: Major
>
> nest-data.json
> {code:java}
> {"a": [{"b": [{"c": [1,2]}]}]}
> {"a": [{"b": [{"c": [1]}, {"c": [2]}]}]}{code}
> {code:java}
> val df: DataFrame = spark.read.json(testFile("nest-data.json"))
> df.createTempView("nest_table")
> sql("select a.b.c from nest_table").show()
> {code}
> {color:#ff0000}org.apache.spark.sql.AnalysisException: cannot resolve 'nest_table.`a`.`b`['c']' due to data type mismatch: argument 2 requires integral type, however, ''c'' is of string type.; line 1 pos 7;{color}
>  {color:#ff0000}'Project [a#6.b[c] AS c#8|#6.b[c] AS c#8]{color}
>  {color:#ff0000}+- SubqueryAlias `nest_table`{color}
>  {color:#ff0000} +- Relation[a#6|#6] json{color}
> {color:#172b4d}Analyse the causes, a.b Expression dataType match extractor for c, but a.b extractor is GetArrayStructFields, ArrayType(ArrayType()) match {color}GetArrayItem, extraction ("c") treat as an ordinal.
> org.apache.spark.sql.catalyst.expressions.ExtractValue
> {code:java}
> def apply(
>       child: Expression,
>       extraction: Expression,
>       resolver: Resolver): Expression = {    
>    (child.dataType, extraction) match {
>       case (StructType(fields), NonNullLiteral(v, StringType)) =>
>         val fieldName = v.toString
>         val ordinal = findField(fields, fieldName, resolver)
>         GetStructField(child, ordinal, Some(fieldName))      
>       case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
>         val fieldName = v.toString
>         val ordinal = findField(fields, fieldName, resolver)
>         GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
>           ordinal, fields.length, containsNull)      
>       case (_: ArrayType, _) => GetArrayItem(child, extraction)      
>       case (MapType(kt, _, _), _) => GetMapValue(child, extraction)      
>       case (otherType, _) =>
>         val errorMsg = otherType match {
>           case StructType(_) =>
>             s"Field name should be String Literal, but it's $extraction"
>           case other =>
>             s"Can't extract value from $child: need struct type but got ${other.catalogString}"
>         }
>         throw new AnalysisException(errorMsg)
>     }
>   }{code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org