You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2022/03/30 08:47:00 UTC
[jira] [Commented] (SPARK-38695) ORC can not surport the dataType,such as char or varchar
[ https://issues.apache.org/jira/browse/SPARK-38695?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17514547#comment-17514547 ]
Yuming Wang commented on SPARK-38695:
-------------------------------------
How to reproduce this issue?
> ORC can not surport the dataType,such as char or varchar
> --------------------------------------------------------
>
> Key: SPARK-38695
> URL: https://issues.apache.org/jira/browse/SPARK-38695
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.1.2, 3.2.1
> Reporter: jacky
> Priority: Major
>
> When testing Spark performance with TPCDS,run some sql,such as:q1,I found this error
> java.lang.UnsupportedOperationException: DataType: char(2)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.getPredicateLeafType(OrcFilters.scala:150)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.getType$1(OrcFilters.scala:222)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.buildLeafSearchArgument(OrcFilters.scala:266)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFiltersHelper$1(OrcFilters.scala:132)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.$anonfun$convertibleFilters$4(OrcFilters.scala:135)
> at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
> at scala.collection.immutable.List.foreach(List.scala:392)
> at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
> at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
> at scala.collection.immutable.List.flatMap(List.scala:355)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFilters(OrcFilters.scala:134)
> at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createFilter(OrcFilters.scala:73)
> at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat.$anonfun$buildReaderWithPartitionValues$4(OrcFileFormat.scala:189)
> at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat.$anonfun$buildReaderWithPartitionValues$4$adapted(OrcFileFormat.scala
>
> I used the sql to create table,such as
> create table customer
> stored as orc
> as select * from tpdc_text.customer
> CLUSTER BY c_customer_sk
>
> create table store
> stored as orc
> as select * from tpdc_text.store
> CLUSTER BY s_store_sk
>
> create table date_dim
> stored as orc
> as select * from tpdc_text.date_dim;
>
> create table store_returns
> (
> sr_return_time_sk bigint
> , sr_item_sk bigint
> , sr_customer_sk bigint
> , sr_cdemo_sk bigint
> , sr_hdemo_sk bigint
> , sr_addr_sk bigint
> , sr_store_sk bigint
> , sr_reason_sk bigint
> , sr_ticket_number bigint
> , sr_return_quantity int
> , sr_return_amt decimal(7,2)
> , sr_return_tax decimal(7,2)
> , sr_return_amt_inc_tax decimal(7,2)
> , sr_fee decimal(7,2)
> , sr_return_ship_cost decimal(7,2)
> , sr_refunded_cash decimal(7,2)
> , sr_reversed_charge decimal(7,2)
> , sr_store_credit decimal(7,2)
> , sr_net_loss decimal(7,2)
> )
> partitioned by (sr_returned_date_sk bigint)
> stored as orc;
>
> when I modify this code in the classOrcFilters,I can run succeed
> /**
> * Get PredicateLeafType which is corresponding to the given DataType.
> */
> def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match {
> case BooleanType => PredicateLeaf.Type.BOOLEAN
> case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
> case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
> case StringType => PredicateLeaf.Type.STRING
> {color:#ff0000}case CharType(length) => PredicateLeaf.Type.STRING{color}
> {color:#ff0000} case VarcharType(length) => PredicateLeaf.Type.STRING{color}
> case DateType => PredicateLeaf.Type.DATE
> case TimestampType => PredicateLeaf.Type.TIMESTAMP
> case _: DecimalType => PredicateLeaf.Type.DECIMAL
> case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
> }
--
This message was sent by Atlassian Jira
(v8.20.1#820001)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org