You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2016/12/07 19:07:10 UTC

parquet-mr git commit: PARQUET-321: Default maximum block padding to 8MB.

Repository: parquet-mr
Updated Branches:
  refs/heads/master 4fd34e651 -> 98c27699c


PARQUET-321: Default maximum block padding to 8MB.

rdblue's change applied to the newest code.

Original pull request: https://github.com/apache/parquet-mr/pull/232/

Author: Zoltan Ivanfi <zi...@cloudera.com>

Closes #391 from zicl/master and squashes the following commits:

b1c5c1d [Zoltan Ivanfi] PARQUET-321: Default maximum block padding to 8MB.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/98c27699
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/98c27699
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/98c27699

Branch: refs/heads/master
Commit: 98c27699cbcf65c3d9d655ecbcd67adcd8b45b05
Parents: 4fd34e6
Author: Zoltan Ivanfi <zi...@cloudera.com>
Authored: Wed Dec 7 11:07:03 2016 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Wed Dec 7 11:07:03 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/parquet/hadoop/ParquetOutputFormat.java    | 6 +-----
 .../src/main/java/org/apache/parquet/hadoop/ParquetWriter.java | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98c27699/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index bd20360..78af765 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -145,9 +145,6 @@ public class ParquetOutputFormat<T> extends FileOutputFormat<Void, T> {
   public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max";
   public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate";
 
-  // default to no padding for now
-  private static final int DEFAULT_MAX_PADDING_SIZE = 0;
-
   public static JobSummaryLevel getJobSummaryLevel(Configuration conf) {
     String level = conf.get(JOB_SUMMARY_LEVEL);
     String deprecatedFlag = conf.get(ENABLE_JOB_SUMMARY);
@@ -313,8 +310,7 @@ public class ParquetOutputFormat<T> extends FileOutputFormat<Void, T> {
   }
 
   private static int getMaxPaddingSize(Configuration conf) {
-    // default to no padding, 0% of the row group size
-    return conf.getInt(MAX_PADDING_BYTES, DEFAULT_MAX_PADDING_SIZE);
+    return conf.getInt(MAX_PADDING_BYTES, ParquetWriter.MAX_PADDING_SIZE_DEFAULT);
   }
 
   private WriteSupport<T> writeSupport;

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98c27699/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index 58cbe95..9512b93 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -49,7 +49,7 @@ public class ParquetWriter<T> implements Closeable {
   public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name";
 
   // max size (bytes) to write as padding and the min size of a row group
-  public static final int MAX_PADDING_SIZE_DEFAULT = 0;
+  public static final int MAX_PADDING_SIZE_DEFAULT = 8 * 1024 * 1024; // 8MB
 
   private final InternalParquetRecordWriter<T> writer;
   private final CodecFactory codecFactory;