You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2022/03/30 08:47:00 UTC

[jira] [Commented] (SPARK-38695) ORC can not surport the dataType,such as char or varchar

    [ https://issues.apache.org/jira/browse/SPARK-38695?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17514547#comment-17514547 ] 

Yuming Wang commented on SPARK-38695:
-------------------------------------

How to reproduce this issue?

> ORC can not surport the dataType,such as char or varchar
> --------------------------------------------------------
>
>                 Key: SPARK-38695
>                 URL: https://issues.apache.org/jira/browse/SPARK-38695
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.1.2, 3.2.1
>            Reporter: jacky
>            Priority: Major
>
> When testing Spark performance with TPCDS,run some sql,such as:q1,I found this error
> java.lang.UnsupportedOperationException: DataType: char(2)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.getPredicateLeafType(OrcFilters.scala:150)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.getType$1(OrcFilters.scala:222)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.buildLeafSearchArgument(OrcFilters.scala:266)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFiltersHelper$1(OrcFilters.scala:132)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.$anonfun$convertibleFilters$4(OrcFilters.scala:135)
>         at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
>         at scala.collection.immutable.List.foreach(List.scala:392)
>         at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
>         at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
>         at scala.collection.immutable.List.flatMap(List.scala:355)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFilters(OrcFilters.scala:134)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createFilter(OrcFilters.scala:73)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat.$anonfun$buildReaderWithPartitionValues$4(OrcFileFormat.scala:189)
>         at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat.$anonfun$buildReaderWithPartitionValues$4$adapted(OrcFileFormat.scala
>  
> I used the sql to create table,such as
> create table customer
> stored as orc
> as select * from tpdc_text.customer
> CLUSTER BY c_customer_sk
>  
> create table store
> stored as orc
> as select * from  tpdc_text.store
> CLUSTER BY s_store_sk
>  
> create table date_dim
> stored as orc
> as select * from tpdc_text.date_dim;
>  
> create table store_returns
> (
>       sr_return_time_sk bigint
> ,     sr_item_sk bigint
> ,     sr_customer_sk bigint
> ,     sr_cdemo_sk bigint
> ,     sr_hdemo_sk bigint
> ,     sr_addr_sk bigint
> ,     sr_store_sk bigint
> ,     sr_reason_sk bigint
> ,     sr_ticket_number bigint
> ,     sr_return_quantity int
> ,     sr_return_amt decimal(7,2)
> ,     sr_return_tax decimal(7,2)
> ,     sr_return_amt_inc_tax decimal(7,2)
> ,     sr_fee decimal(7,2) 
> ,     sr_return_ship_cost decimal(7,2)
> ,     sr_refunded_cash decimal(7,2)
> ,     sr_reversed_charge decimal(7,2)
> ,     sr_store_credit decimal(7,2)
> ,     sr_net_loss decimal(7,2)
> )
> partitioned by (sr_returned_date_sk bigint)
> stored as orc;
>  
> when I modify this code in the classOrcFilters,I can run succeed
> /**
>  * Get PredicateLeafType which is corresponding to the given DataType.
> */
> def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match {
> case BooleanType => PredicateLeaf.Type.BOOLEAN
> case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
> case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
> case StringType => PredicateLeaf.Type.STRING
> {color:#ff0000}case CharType(length) => PredicateLeaf.Type.STRING{color}
> {color:#ff0000} case VarcharType(length) => PredicateLeaf.Type.STRING{color}
> case DateType => PredicateLeaf.Type.DATE
> case TimestampType => PredicateLeaf.Type.TIMESTAMP
> case _: DecimalType => PredicateLeaf.Type.DECIMAL
> case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
> }



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org