You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "ShiHang Gao (Jira)" <ji...@apache.org> on 2022/01/26 15:50:00 UTC
[jira] [Updated] (SPARK-38038) DataSourceV2 OrcTable can't support read partition doesn't contains "="
[ https://issues.apache.org/jira/browse/SPARK-38038?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ShiHang Gao updated SPARK-38038:
--------------------------------
Summary: DataSourceV2 OrcTable can't support read partition doesn't contains "=" (was: DataSourceV2 ORCTable can't support read partition doesn't contains "=")
> DataSourceV2 OrcTable can't support read partition doesn't contains "="
> -----------------------------------------------------------------------
>
> Key: SPARK-38038
> URL: https://issues.apache.org/jira/browse/SPARK-38038
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.0.1
> Reporter: ShiHang Gao
> Priority: Critical
> Labels: bulk-closed
>
> I want to use DataSource V2 ORCTable to read a Hive partitioned table, The directory of the partition table is as follows:
> {code:java}
> hadoop fs -ls /user/hive/warehouse/f_audience_cpx_daily
> Found 2 items
> /user/hive/warehouse/f_audience_cpx_daily/20220103
> /user/hive/warehouse/f_audience_cpx_daily/20220104 {code}
> And when I read, I get this error:
> {code:java}
> Exception in thread "main" org.apache.spark.sql.AnalysisException: Unable to infer schema for ORC. It must be specified manually.;
> at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$5(FileTable.scala:71)
> at scala.Option.getOrElse(Option.scala:189)
> at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:71)
> at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:63)
> at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:82)
> at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:80)
> at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:150)
> at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:158)
> at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveTables$$lookupV2Relation(Analyzer.scala:924)
> at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables$$anonfun$apply$8.applyOrElse(Analyzer.scala:886)
> at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables$$anonfun$apply$8.applyOrElse(Analyzer.scala:884)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUp$3(AnalysisHelper.scala:90)
> at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUp$1(AnalysisHelper.scala:90)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp(AnalysisHelper.scala:86)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsUp$(AnalysisHelper.scala:84)
> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
> at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables$.apply(Analyzer.scala:884)
> at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables$.apply(Analyzer.scala:883)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:149)
> at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
> at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
> at scala.collection.immutable.List.foldLeft(List.scala:89)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:146)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:138)
> at scala.collection.immutable.List.foreach(List.scala:392)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:138)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:176)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:170)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:130)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:116)
> at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:116)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:154)
> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
> at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:153)
> at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:68)
> at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
> at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:133)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
> at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:133)
> at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:68)
> at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:66)
> at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:58)
> at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:91)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:89)
> at org.apache.spark.sql.SparkSession.table(SparkSession.scala:585)
> at org.apache.spark.sql.SparkSession.table(SparkSession.scala:581) {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org