You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/03/30 16:44:19 UTC

[GitHub] [spark] sunchao commented on a change in pull request #31776: [SPARK-34661][SQL] Clean up `OriginalType` and `DecimalMetadata ` usage in Parquet related code

sunchao commented on a change in pull request #31776:
URL: https://github.com/apache/spark/pull/31776#discussion_r604249720



##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
##########
@@ -105,23 +109,29 @@ class ParquetFilters(
       fieldType: ParquetSchemaType)
 
   private case class ParquetSchemaType(
-      originalType: OriginalType,
+      logicalTypeAnnotation: LogicalTypeAnnotation,
       primitiveTypeName: PrimitiveTypeName,
       length: Int,
-      decimalMetadata: DecimalMetadata)
+      decimalLogicalType: DecimalLogicalTypeAnnotation)

Review comment:
       this may no longer needed since we can just use `logicalTypeAnnotation`?

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
##########
@@ -105,23 +109,29 @@ class ParquetFilters(
       fieldType: ParquetSchemaType)
 
   private case class ParquetSchemaType(
-      originalType: OriginalType,
+      logicalTypeAnnotation: LogicalTypeAnnotation,
       primitiveTypeName: PrimitiveTypeName,
       length: Int,
-      decimalMetadata: DecimalMetadata)
+      decimalLogicalType: DecimalLogicalTypeAnnotation)
 
   private val ParquetBooleanType = ParquetSchemaType(null, BOOLEAN, 0, null)
-  private val ParquetByteType = ParquetSchemaType(INT_8, INT32, 0, null)
-  private val ParquetShortType = ParquetSchemaType(INT_16, INT32, 0, null)
+  private val ParquetByteType =
+    ParquetSchemaType(LogicalTypeAnnotation.intType(8, true), INT32, 0, null)
+  private val ParquetShortType =
+    ParquetSchemaType(LogicalTypeAnnotation.intType(16, true), INT32, 0, null)
   private val ParquetIntegerType = ParquetSchemaType(null, INT32, 0, null)
   private val ParquetLongType = ParquetSchemaType(null, INT64, 0, null)
   private val ParquetFloatType = ParquetSchemaType(null, FLOAT, 0, null)
   private val ParquetDoubleType = ParquetSchemaType(null, DOUBLE, 0, null)
-  private val ParquetStringType = ParquetSchemaType(UTF8, BINARY, 0, null)
+  private val ParquetStringType =
+    ParquetSchemaType(LogicalTypeAnnotation.stringType(), BINARY, 0, null)
   private val ParquetBinaryType = ParquetSchemaType(null, BINARY, 0, null)
-  private val ParquetDateType = ParquetSchemaType(DATE, INT32, 0, null)
-  private val ParquetTimestampMicrosType = ParquetSchemaType(TIMESTAMP_MICROS, INT64, 0, null)
-  private val ParquetTimestampMillisType = ParquetSchemaType(TIMESTAMP_MILLIS, INT64, 0, null)
+  private val ParquetDateType =
+    ParquetSchemaType(LogicalTypeAnnotation.dateType(), INT32, 0, null)
+  private val ParquetTimestampMicrosType =
+    ParquetSchemaType(LogicalTypeAnnotation.timestampType(true, TimeUnit.MICROS), INT64, 0, null)
+  private val ParquetTimestampMillisType =
+    ParquetSchemaType(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS), INT64, 0, null)

Review comment:
       I think there is also `TimeUnit.NANOS` but seems Spark doesn't support it yet.

##########
File path: sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
##########
@@ -894,4 +904,15 @@ private void readPageV2(DataPageV2 page) throws IOException {
       throw new IOException("could not read page " + page + " in col " + descriptor, e);
     }
   }
+
+  private boolean isTimestampTypeMatched(TimeUnit unit) {

Review comment:
       nit: these can be static methods




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org