You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (Jira)" <ji...@apache.org> on 2020/10/28 12:30:00 UTC
[jira] [Assigned] (SPARK-33267) Query with having null in "in" condition against data source V2 source table supporting push down filter fails with NPE

     [ https://issues.apache.org/jira/browse/SPARK-33267?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Apache Spark reassigned SPARK-33267:
------------------------------------

    Assignee: Apache Spark

> Query with having null in "in" condition against data source V2 source table supporting push down filter fails with NPE
> -----------------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-33267
>                 URL: https://issues.apache.org/jira/browse/SPARK-33267
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.0.0, 3.0.1, 3.1.0
>            Reporter: Jungtaek Lim
>            Assignee: Apache Spark
>            Priority: Major
>
> The query with having null in "in" condition against data source V2 source table supporting push down filter fails with NPE.
> {code}
> scala> spark.sql("SELECT * FROM catalog.default.t1 WHERE id IN (1, null)").show()
> java.lang.NullPointerException
>   at org.apache.spark.sql.sources.In.$anonfun$hashCode$1(filters.scala:167)
>   at org.apache.spark.sql.sources.In.$anonfun$hashCode$1$adapted(filters.scala:165)
>   at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
>   at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
>   at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
>   at org.apache.spark.sql.sources.In.hashCode(filters.scala:165)
>   at scala.runtime.Statics.anyHash(Statics.java:122)
>   at scala.collection.mutable.HashTable$HashUtils.elemHashCode(HashTable.scala:416)
>   at scala.collection.mutable.HashTable$HashUtils.elemHashCode$(HashTable.scala:416)
>   at scala.collection.mutable.HashMap.elemHashCode(HashMap.scala:44)
>   at scala.collection.mutable.HashTable.findOrAddEntry(HashTable.scala:168)
>   at scala.collection.mutable.HashTable.findOrAddEntry$(HashTable.scala:167)
>   at scala.collection.mutable.HashMap.findOrAddEntry(HashMap.scala:44)
>   at scala.collection.mutable.HashMap.put(HashMap.scala:126)
>   at scala.collection.mutable.HashMap.update(HashMap.scala:131)
>   at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.translateFilterWithMapping(DataSourceStrategy.scala:576)
>   at org.apache.spark.sql.execution.datasources.v2.PushDownUtils$.$anonfun$pushFilters$1(PushDownUtils.scala:52)
>   at scala.collection.immutable.List.foreach(List.scala:392)
>   at org.apache.spark.sql.execution.datasources.v2.PushDownUtils$.pushFilters(PushDownUtils.scala:49)
>   at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$$anonfun$apply$1.applyOrElse(V2ScanRelationPushDown.scala:44)
>   at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$$anonfun$apply$1.applyOrElse(V2ScanRelationPushDown.scala:32)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:309)
>   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:309)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:149)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:147)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:314)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:399)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:237)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:397)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:350)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:314)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:149)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:147)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:314)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:399)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:237)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:397)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:350)
>   at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:314)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:149)
>   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:147)
>   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
>   at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:32)
>   at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:29)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:149)
>   at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
>   at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
>   at scala.collection.immutable.List.foldLeft(List.scala:89)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:146)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:138)
>   at scala.collection.immutable.List.foreach(List.scala:392)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:138)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:116)
>   at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
>   at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:116)
>   at org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:82)
>   at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
>   at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:133)
>   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
>   at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:133)
>   at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:82)
>   at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:79)
>   at org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$4(QueryExecution.scala:197)
>   at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:381)
>   at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$writePlans(QueryExecution.scala:197)
>   at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:207)
>   at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:95)
>   at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
>   at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
>   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
>   at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>   at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
>   at org.apache.spark.sql.Dataset.head(Dataset.scala:2697)
>   at org.apache.spark.sql.Dataset.take(Dataset.scala:2904)
>   at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300)
>   at org.apache.spark.sql.Dataset.showString(Dataset.scala:337)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:824)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:783)
>   at org.apache.spark.sql.Dataset.show(Dataset.scala:792)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org