You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by su...@apache.org on 2022/01/19 17:52:22 UTC
[spark] branch master updated: [SPARK-36879][SQL][FOLLOWUP] Address comments and fix code style
This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c288b34 [SPARK-36879][SQL][FOLLOWUP] Address comments and fix code style
c288b34 is described below
commit c288b3466158498e8e73279b3a7828ff608e35a7
Author: Parth Chandra <pa...@apache.org>
AuthorDate: Wed Jan 19 09:51:33 2022 -0800
[SPARK-36879][SQL][FOLLOWUP] Address comments and fix code style
### What changes were proposed in this pull request?
Addresses some formatting changes that were requested in a previous PR (after it was merged).
### Why are the changes needed?
Review comments addressed
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
Not needed. Existing unit tests pass.
Closes #35212 from parthchandra/SPARK-36879-PR2.
Authored-by: Parth Chandra <pa...@apache.org>
Signed-off-by: Chao Sun <su...@apple.com>
---
.../parquet/VectorizedDeltaBinaryPackedReader.java | 8 +--
.../parquet/ParquetRebaseDatetimeSuite.scala | 67 +++++++++++-----------
2 files changed, 38 insertions(+), 37 deletions(-)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
index 62fb5f8..7b2aac3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
@@ -73,10 +73,10 @@ public class VectorizedDeltaBinaryPackedReader extends VectorizedReaderBase {
private ByteBufferInputStream in;
// temporary buffers used by readByte, readShort, readInteger, and readLong
- byte byteVal;
- short shortVal;
- int intVal;
- long longVal;
+ private byte byteVal;
+ private short shortVal;
+ private int intVal;
+ private long longVal;
@Override
public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
index 49251af..dbf7f54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
@@ -143,12 +143,12 @@ abstract class ParquetRebaseDatetimeSuite
val df = Seq.tabulate(N)(rowFunc).toDF("dict", "plain")
.select($"dict".cast(catalystType), $"plain".cast(catalystType))
withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsOutputType) {
- checkDefaultLegacyRead(oldPath)
+ checkDefaultLegacyRead(oldPath)
withSQLConf(inWriteConf -> CORRECTED.toString) {
- df.write.mode("overwrite").parquet(path3_x)
+ df.write.mode("overwrite").parquet(path3_x)
}
withSQLConf(inWriteConf -> LEGACY.toString) {
- df.write.parquet(path3_x_rebase)
+ df.write.parquet(path3_x_rebase)
}
}
// For Parquet files written by Spark 3.0, we know the writer info and don't need the
@@ -243,40 +243,41 @@ abstract class ParquetRebaseDatetimeSuite
SQLConf.PARQUET_INT96_REBASE_MODE_IN_READ.key
)
).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf) =>
- // Ignore the default JVM time zone and use the session time zone instead of it in rebasing.
- DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.JST) {
- withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> DateTimeTestUtils.LA.getId) {
- withClue(s"output type $outType") {
- withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
- withTempPath { dir =>
- val path = dir.getAbsolutePath
- withSQLConf(inWriteConf -> LEGACY.toString) {
- Seq.tabulate(N)(_ => tsStr).toDF("tsS")
- .select($"tsS".cast("timestamp").as("ts"))
- .repartition(1)
- .write
- .option("parquet.enable.dictionary", dictionaryEncoding)
- .parquet(path)
- }
+ // Ignore the default JVM time zone and use the session time zone instead of
+ // it in rebasing.
+ DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.JST) {
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> DateTimeTestUtils.LA.getId) {
+ withClue(s"output type $outType") {
+ withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
+ withTempPath { dir =>
+ val path = dir.getAbsolutePath
+ withSQLConf(inWriteConf -> LEGACY.toString) {
+ Seq.tabulate(N)(_ => tsStr).toDF("tsS")
+ .select($"tsS".cast("timestamp").as("ts"))
+ .repartition(1)
+ .write
+ .option("parquet.enable.dictionary", dictionaryEncoding)
+ .parquet(path)
+ }
- withAllParquetReaders {
- // The file metadata indicates if it needs rebase or not, so we can always get
- // the correct result regardless of the "rebase mode" config.
- runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) { options =>
- checkAnswer(
- spark.read.options(options).parquet(path).select($"ts".cast("string")),
- Seq.tabulate(N)(_ => Row(tsStr)))
- }
+ withAllParquetReaders {
+ // The file metadata indicates if it needs rebase or not, so we can always get
+ // the correct result regardless of the "rebase mode" config.
+ runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) { options =>
+ checkAnswer(
+ spark.read.options(options).parquet(path).select($"ts".cast("string")),
+ Seq.tabulate(N)(_ => Row(tsStr)))
+ }
- // Force to not rebase to prove the written datetime values are rebased
- // and we will get wrong result if we don't rebase while reading.
- withSQLConf("spark.test.forceNoRebase" -> "true") {
- checkAnswer(
- spark.read.parquet(path).select($"ts".cast("string")),
- Seq.tabulate(N)(_ => Row(nonRebased)))
+ // Force to not rebase to prove the written datetime values are rebased
+ // and we will get wrong result if we don't rebase while reading.
+ withSQLConf("spark.test.forceNoRebase" -> "true") {
+ checkAnswer(
+ spark.read.parquet(path).select($"ts".cast("string")),
+ Seq.tabulate(N)(_ => Row(nonRebased)))
+ }
}
}
- }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org