You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yiqun Zhang (Jira)" <ji...@apache.org> on 2020/06/16 13:14:00 UTC

[jira] [Updated] (SPARK-32002) spark error while select nest data

     [ https://issues.apache.org/jira/browse/SPARK-32002?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Yiqun Zhang updated SPARK-32002:
--------------------------------
    Description: 
nest-data.json
{code:java}
{"a": [{"b": [{"c": [1,2]}]}]}
{"a": [{"b": [{"c": [1]}, {"c": [2]}]}]}{code}
{code:java}
val df: DataFrame = spark.read.json(testFile("nest-data.json"))
df.createTempView("nest_table")
sql("select a.b.c from nest_table").show()

{code}
{color:#ff0000}org.apache.spark.sql.AnalysisException: cannot resolve 'nest_table.`a`.`b`['c']' due to data type mismatch: argument 2 requires integral type, however, ''c'' is of string type.; line 1 pos 7;{color}
 {color:#ff0000}'Project [a#6.b[c] AS c#8|#6.b[c] AS c#8]{color}
 {color:#ff0000}+- SubqueryAlias `nest_table`{color}
 {color:#ff0000} +- Relation[a#6|#6] json{color}

{color:#172b4d}Analyse the causes, a.b Expression dataType match extractor for c, but a.b extractor is GetArrayStructFields, ArrayType(ArrayType()) match {color}GetArrayItem, extraction ("c") treat as an ordinal.
{code:java}
def apply(
      child: Expression,
      extraction: Expression,
      resolver: Resolver): Expression = {    (child.dataType, extraction) match {
      case (StructType(fields), NonNullLiteral(v, StringType)) =>
        val fieldName = v.toString
        val ordinal = findField(fields, fieldName, resolver)
        GetStructField(child, ordinal, Some(fieldName))      
      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
        val fieldName = v.toString
        val ordinal = findField(fields, fieldName, resolver)
        GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
          ordinal, fields.length, containsNull)      
      case (_: ArrayType, _) => GetArrayItem(child, extraction)      
      case (MapType(kt, _, _), _) => GetMapValue(child, extraction)      
      case (otherType, _) =>
        val errorMsg = otherType match {
          case StructType(_) =>
            s"Field name should be String Literal, but it's $extraction"
          case other =>
            s"Can't extract value from $child: need struct type but got ${other.catalogString}"
        }
        throw new AnalysisException(errorMsg)
    }
  }{code}
 

  was:
nest-data.json
{code:java}
{"a": [{"b": [{"c": [1,2]}]}]}
{"a": [{"b": [{"c": [1]}, {"c": [2]}]}]}{code}
{code:java}
val df: DataFrame = spark.read.json(testFile("nest-data.json"))
df.createTempView("nest_table")
sql("select a.b.c from nest_table").show()

{code}
{color:#FF0000}org.apache.spark.sql.AnalysisException: cannot resolve 'nest_table.`a`.`b`['c']' due to data type mismatch: argument 2 requires integral type, however, ''c'' is of string type.; line 1 pos 7;{color}
{color:#FF0000}'Project [a#6.b[c] AS c#8]{color}
{color:#FF0000}+- SubqueryAlias `nest_table`{color}
{color:#FF0000} +- Relation[a#6] json{color}

{color:#172b4d}Analyse the causes, a.b Expression dataType match extractor for c, but a.b extractor is GetArrayStructFields, ArrayType(ArrayType()) match {color}GetArrayItem, extraction ("c") treat as an ordinal.
{code:java}
def apply(
      child: Expression,
      extraction: Expression,
      resolver: Resolver): Expression = {    (child.dataType, extraction) match {
      case (StructType(fields), NonNullLiteral(v, StringType)) =>
        val fieldName = v.toString
        val ordinal = findField(fields, fieldName, resolver)
        GetStructField(child, ordinal, Some(fieldName))      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
        val fieldName = v.toString
        val ordinal = findField(fields, fieldName, resolver)
        GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
          ordinal, fields.length, containsNull)      case (_: ArrayType, _) => GetArrayItem(child, extraction)      case (MapType(kt, _, _), _) => GetMapValue(child, extraction)      case (otherType, _) =>
        val errorMsg = otherType match {
          case StructType(_) =>
            s"Field name should be String Literal, but it's $extraction"
          case other =>
            s"Can't extract value from $child: need struct type but got ${other.catalogString}"
        }
        throw new AnalysisException(errorMsg)
    }
  }{code}
 


> spark error while select nest data
> ----------------------------------
>
>                 Key: SPARK-32002
>                 URL: https://issues.apache.org/jira/browse/SPARK-32002
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.4
>            Reporter: Yiqun Zhang
>            Priority: Minor
>
> nest-data.json
> {code:java}
> {"a": [{"b": [{"c": [1,2]}]}]}
> {"a": [{"b": [{"c": [1]}, {"c": [2]}]}]}{code}
> {code:java}
> val df: DataFrame = spark.read.json(testFile("nest-data.json"))
> df.createTempView("nest_table")
> sql("select a.b.c from nest_table").show()
> {code}
> {color:#ff0000}org.apache.spark.sql.AnalysisException: cannot resolve 'nest_table.`a`.`b`['c']' due to data type mismatch: argument 2 requires integral type, however, ''c'' is of string type.; line 1 pos 7;{color}
>  {color:#ff0000}'Project [a#6.b[c] AS c#8|#6.b[c] AS c#8]{color}
>  {color:#ff0000}+- SubqueryAlias `nest_table`{color}
>  {color:#ff0000} +- Relation[a#6|#6] json{color}
> {color:#172b4d}Analyse the causes, a.b Expression dataType match extractor for c, but a.b extractor is GetArrayStructFields, ArrayType(ArrayType()) match {color}GetArrayItem, extraction ("c") treat as an ordinal.
> {code:java}
> def apply(
>       child: Expression,
>       extraction: Expression,
>       resolver: Resolver): Expression = {    (child.dataType, extraction) match {
>       case (StructType(fields), NonNullLiteral(v, StringType)) =>
>         val fieldName = v.toString
>         val ordinal = findField(fields, fieldName, resolver)
>         GetStructField(child, ordinal, Some(fieldName))      
>       case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
>         val fieldName = v.toString
>         val ordinal = findField(fields, fieldName, resolver)
>         GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
>           ordinal, fields.length, containsNull)      
>       case (_: ArrayType, _) => GetArrayItem(child, extraction)      
>       case (MapType(kt, _, _), _) => GetMapValue(child, extraction)      
>       case (otherType, _) =>
>         val errorMsg = otherType match {
>           case StructType(_) =>
>             s"Field name should be String Literal, but it's $extraction"
>           case other =>
>             s"Can't extract value from $child: need struct type but got ${other.catalogString}"
>         }
>         throw new AnalysisException(errorMsg)
>     }
>   }{code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org