You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spark.apache.org by Suryansh Agnihotri <sa...@gmail.com> on 2023/10/30 12:54:29 UTC
Spark 3.2.1 parquet read error
Hello spark-dev
I have loaded tpcds data in parquet format using spark *3.0.2* and while
reading it from spark *3.2.1* , my query is failing with below error.
Later I set spark.sql.parquet.enableVectorizedReader=false my but it
resulted in a different error. I am also providing output of parquet-tools
below.
spark-sql> select * from store_sales limit 100;
org.apache.spark.sql.execution.QueryExecutionException: Parquet column
cannot be converted in file
obj_store_location/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet.
Column: [ss_sold_time_sk], Expected: bigint, Found: INT32
at org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedSchemaColumnConvertError(QueryExecutionErrors.scala:570)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:195)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:522)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown
Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:349)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
at org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterFactory.constructConvertNotSupportedException(ParquetVectorUpdaterFactory.java:1104)
at org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterFactory.getUpdater(ParquetVectorUpdaterFactory.java:181)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:161)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:298)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:196)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:191)
... 21 more
23/10/30 10:24:48 WARN TaskSetManager: Lost task 0.2 in stage 1.0
(TID 1826) org.apache.spark.sql.execution.QueryExecutionException:
Parquet column cannot be converted in file
objstore_location/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet.
Column: [ss_sold_time_sk], Expected: bigint, Found: INT32
at org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedSchemaColumnConvertError(QueryExecutionErrors.scala:570)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:195)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:522)
set spark.sql.parquet.enableVectorizedReader=false;spark.sql.parquet.enableVectorizedReader false
Time taken: 0.023 seconds, Fetched 1 row(s)
23/10/30 12:08:50 INFO SparkSQLCLIDriver: Time taken: 0.023 seconds,
Fetched 1 row(s)
spark-sql> select * from store_sales limit 10;
23/10/30 12:09:06 WARN TaskSetManager: Lost task 0.0 in stage 6.0 (TID
1847) (trinoprwn5.subnetpoc1.vcn12231050.oraclevcn.com executor 11):
org.apache.spark.sql.execution.QueryExecutionException: Encounter
error while reading parquet files. One possible cause: Parquet column
cannot be converted in the corresponding files. Details:
at org.apache.spark.sql.errors.QueryExecutionErrors$.cannotReadParquetFilesError(QueryExecutionErrors.scala:577)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:200)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:349)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
Caused by: org.apache.parquet.io.ParquetDecodingException: Can not
read value at 1 in block 0 in file
objstorelocation/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
at org.apache.parquet.hadoop.ParquetRecordReader.nextKeyValue(ParquetRecordReader.java:207)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator$$anon$1.hasNext(RecordReaderIterator.scala:61)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:191)
... 17 more
Caused by: java.lang.ClassCastException:
org.apache.spark.sql.catalyst.expressions.MutableLong cannot be cast
to org.apache.spark.sql.catalyst.expressions.MutableInt
at org.apache.spark.sql.catalyst.expressions.SpecificInternalRow.setInt(SpecificInternalRow.scala:254)
at org.apache.spark.sql.execution.datasources.parquet.ParquetRowConverter$RowUpdater.setInt(ParquetRowConverter.scala:179)
at org.apache.spark.sql.execution.datasources.parquet.ParquetPrimitiveConverter.addInt(ParquetRowConverter.scala:89)
at org.apache.parquet.column.impl.ColumnReaderBase$2$3.writeValue(ColumnReaderBase.java:297)
at org.apache.parquet.column.impl.ColumnReaderBase.writeCurrentValueToConverter(ColumnReaderBase.java:440)
at org.apache.parquet.column.impl.ColumnReaderImpl.writeCurrentValueToConverter(ColumnReaderImpl.java:30)
at org.apache.parquet.io.RecordReaderImplementation.read(RecordReaderImplementation.java:406)
at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:229)
... 22 more
java -cp /usr/lib/hadoop/hadoop-common.jar:/usr/lib/hadoop/lib/*:/home/opc/parquet_file/parquet-tools-1.9.0.jar
org.apache.parquet.tools.Main meta
part-00042-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
file:
file:/home/opc/parquet_file/part-00042-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
creator: parquet-mr version 1.10.1 (build
a89df8f9932b6ef6633d06069e50c9b7970bebd1)
extra: org.apache.spark.version = 3.0.2
extra: org.apache.spark.sql.parquet.row.metadata =
{"type":"struct","fields":[{"name":"ss_sold_time_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_item_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_customer_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_cdemo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_hdemo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_addr_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_store_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_promo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_ticket_number","type":"long","nullable":true,"metadata":{}},{"name":"ss_quantity","type":"integer","nullable":true,"metadata":{}},{"name":"ss_wholesale_cost","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_list_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_sales_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_discount_amt","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_sales_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_wholesale_cost","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_list_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_tax","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_coupon_amt","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_paid","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_paid_inc_tax","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_profit","type":"decimal(7,2)","nullable":true,"metadata":{}}]}
file schema: spark_schema
--------------------------------------------------------------------------------
ss_sold_time_sk: OPTIONAL INT32 R:0 D:1
ss_item_sk: OPTIONAL INT32 R:0 D:1
ss_customer_sk: OPTIONAL INT32 R:0 D:1
ss_cdemo_sk: OPTIONAL INT32 R:0 D:1
ss_hdemo_sk: OPTIONAL INT32 R:0 D:1
ss_addr_sk: OPTIONAL INT32 R:0 D:1
ss_store_sk: OPTIONAL INT32 R:0 D:1
ss_promo_sk: OPTIONAL INT32 R:0 D:1
ss_ticket_number: OPTIONAL INT64 R:0 D:1
ss_quantity: OPTIONAL INT32 R:0 D:1
ss_wholesale_cost: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_list_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_sales_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_ext_discount_amt: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_ext_sales_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_ext_wholesale_cost: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_ext_list_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_ext_tax: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_coupon_amt: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_net_paid: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_net_paid_inc_tax: OPTIONAL INT32 O:DECIMAL R:0 D:1
ss_net_profit: OPTIONAL INT32 O:DECIMAL R:0 D:1
*Please let me know if this is a known issue or fixed in later versions.*
Thanks
Re: Spark 3.2.1 parquet read error
Posted by Mich Talebzadeh <mi...@gmail.com>.
Hi,
The error message when reading Parquet data in Spark 3.2.1 is due to a
schema mismatch between the Parquet file and the Spark schema. The Parquet
file contains INT32 data for the ss_sold_time_sk column, while Spark schema
expects it to be BIGINT. This schema mismatch is causing the error.
This is the likely cause.
Parquet column cannot be converted in file
obj_store_location/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet.
Column: [ss_sold_time_sk], Expected: bigint, Found: INT32
Does this read work with Spark 3.0.2?
HTH
Mich Talebzadeh,
Distinguished Technologist, Solutions Architect & Engineer
London
United Kingdom
view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
https://en.everybodywiki.com/Mich_Talebzadeh
Disclaimer: Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.
Mich Talebzadeh,
Distinguished Technologist, Solutions Architect & Engineer
London
United Kingdom
view my Linkedin profile
<https://www.linkedin.com/in/mich-talebzadeh-ph-d-5205b2/>
https://en.everybodywiki.com/Mich_Talebzadeh
*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.
On Mon, 30 Oct 2023 at 14:11, Suryansh Agnihotri <sa...@gmail.com>
wrote:
> Hello spark-dev
> I have loaded tpcds data in parquet format using spark *3.0.2* and while
> reading it from spark *3.2.1* , my query is failing with below error.
>
> Later I set spark.sql.parquet.enableVectorizedReader=false my but it
> resulted in a different error. I am also providing output of parquet-tools
> below.
>
> spark-sql> select * from store_sales limit 100;
>
> org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file obj_store_location/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet. Column: [ss_sold_time_sk], Expected: bigint, Found: INT32
> at org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedSchemaColumnConvertError(QueryExecutionErrors.scala:570)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:195)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
> at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:522)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759)
> at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:349)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
> at org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterFactory.constructConvertNotSupportedException(ParquetVectorUpdaterFactory.java:1104)
> at org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterFactory.getUpdater(ParquetVectorUpdaterFactory.java:181)
> at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:161)
> at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:298)
> at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:196)
> at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:191)
> ... 21 more
>
> 23/10/30 10:24:48 WARN TaskSetManager: Lost task 0.2 in stage 1.0 (TID 1826) org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file objstore_location/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet. Column: [ss_sold_time_sk], Expected: bigint, Found: INT32
> at org.apache.spark.sql.errors.QueryExecutionErrors$.unsupportedSchemaColumnConvertError(QueryExecutionErrors.scala:570)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:195)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
> at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:522)
>
>
>
>
> set spark.sql.parquet.enableVectorizedReader=false;spark.sql.parquet.enableVectorizedReader false
> Time taken: 0.023 seconds, Fetched 1 row(s)
> 23/10/30 12:08:50 INFO SparkSQLCLIDriver: Time taken: 0.023 seconds, Fetched 1 row(s)
> spark-sql> select * from store_sales limit 10;
> 23/10/30 12:09:06 WARN TaskSetManager: Lost task 0.0 in stage 6.0 (TID 1847) (trinoprwn5.subnetpoc1.vcn12231050.oraclevcn.com executor 11): org.apache.spark.sql.execution.QueryExecutionException: Encounter error while reading parquet files. One possible cause: Parquet column cannot be converted in the corresponding files. Details:
> at org.apache.spark.sql.errors.QueryExecutionErrors$.cannotReadParquetFilesError(QueryExecutionErrors.scala:577)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:200)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
> at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
> at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:349)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value at 1 in block 0 in file objstorelocation/store_sales/ss_sold_date_sk=2451121/part-00440-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
> at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
> at org.apache.parquet.hadoop.ParquetRecordReader.nextKeyValue(ParquetRecordReader.java:207)
> at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
> at org.apache.spark.sql.execution.datasources.RecordReaderIterator$$anon$1.hasNext(RecordReaderIterator.scala:61)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:104)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:191)
> ... 17 more
> Caused by: java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableLong cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt
> at org.apache.spark.sql.catalyst.expressions.SpecificInternalRow.setInt(SpecificInternalRow.scala:254)
> at org.apache.spark.sql.execution.datasources.parquet.ParquetRowConverter$RowUpdater.setInt(ParquetRowConverter.scala:179)
> at org.apache.spark.sql.execution.datasources.parquet.ParquetPrimitiveConverter.addInt(ParquetRowConverter.scala:89)
> at org.apache.parquet.column.impl.ColumnReaderBase$2$3.writeValue(ColumnReaderBase.java:297)
> at org.apache.parquet.column.impl.ColumnReaderBase.writeCurrentValueToConverter(ColumnReaderBase.java:440)
> at org.apache.parquet.column.impl.ColumnReaderImpl.writeCurrentValueToConverter(ColumnReaderImpl.java:30)
> at org.apache.parquet.io.RecordReaderImplementation.read(RecordReaderImplementation.java:406)
> at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:229)
> ... 22 more
>
> java -cp /usr/lib/hadoop/hadoop-common.jar:/usr/lib/hadoop/lib/*:/home/opc/parquet_file/parquet-tools-1.9.0.jar org.apache.parquet.tools.Main meta part-00042-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
> file: file:/home/opc/parquet_file/part-00042-eac89ce9-041a-4254-b90a-6aceb3c8e6c4.c000.snappy.parquet
> creator: parquet-mr version 1.10.1 (build a89df8f9932b6ef6633d06069e50c9b7970bebd1)
> extra: org.apache.spark.version = 3.0.2
> extra: org.apache.spark.sql.parquet.row.metadata = {"type":"struct","fields":[{"name":"ss_sold_time_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_item_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_customer_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_cdemo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_hdemo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_addr_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_store_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_promo_sk","type":"integer","nullable":true,"metadata":{}},{"name":"ss_ticket_number","type":"long","nullable":true,"metadata":{}},{"name":"ss_quantity","type":"integer","nullable":true,"metadata":{}},{"name":"ss_wholesale_cost","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_list_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_sales_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_discount_amt","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_sales_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_wholesale_cost","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_list_price","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_ext_tax","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_coupon_amt","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_paid","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_paid_inc_tax","type":"decimal(7,2)","nullable":true,"metadata":{}},{"name":"ss_net_profit","type":"decimal(7,2)","nullable":true,"metadata":{}}]}
>
> file schema: spark_schema
> --------------------------------------------------------------------------------
> ss_sold_time_sk: OPTIONAL INT32 R:0 D:1
> ss_item_sk: OPTIONAL INT32 R:0 D:1
> ss_customer_sk: OPTIONAL INT32 R:0 D:1
> ss_cdemo_sk: OPTIONAL INT32 R:0 D:1
> ss_hdemo_sk: OPTIONAL INT32 R:0 D:1
> ss_addr_sk: OPTIONAL INT32 R:0 D:1
> ss_store_sk: OPTIONAL INT32 R:0 D:1
> ss_promo_sk: OPTIONAL INT32 R:0 D:1
> ss_ticket_number: OPTIONAL INT64 R:0 D:1
> ss_quantity: OPTIONAL INT32 R:0 D:1
> ss_wholesale_cost: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_list_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_sales_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_ext_discount_amt: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_ext_sales_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_ext_wholesale_cost: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_ext_list_price: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_ext_tax: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_coupon_amt: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_net_paid: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_net_paid_inc_tax: OPTIONAL INT32 O:DECIMAL R:0 D:1
> ss_net_profit: OPTIONAL INT32 O:DECIMAL R:0 D:1
>
> *Please let me know if this is a known issue or fixed in later versions.*
>
> Thanks
>