You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2022/01/31 03:55:50 UTC

[orc] branch main updated: ORC-1109: Use `zstd` instead of `none` in the default compress option (#1035)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new bcfa71e  ORC-1109: Use `zstd` instead of `none` in the default compress option (#1035)
bcfa71e is described below

commit bcfa71e2c7fc8894b9075e021fcf00b976b0257e
Author: William Hyun <wi...@apache.org>
AuthorDate: Sun Jan 30 19:55:44 2022 -0800

    ORC-1109: Use `zstd` instead of `none` in the default compress option (#1035)
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use `zstd` instead of `none` in the default compress option.
    
    ### Why are the changes needed?
    
    This will reduce the hardware requirements for running benchmark.
    
    ```
    $ du -h * | sort -nr
    112G	github
     67G	sales
     21G	taxi
    ```
    ```
    $ du -h */*none | sort -nr
    663M	taxi/parquet.none
     28G	github/json.none
     22G	sales/json.none
     18G	github/avro.none
     14G	github/parquet.none
     12G	github/orc.none
     10G	taxi/json.none
    4.9G	sales/avro.none
    4.2G	sales/parquet.none
    2.9G	sales/orc.none
    2.0G	taxi/avro.none
    1.2G	taxi/orc.none
    ```
    
    ### How was this patch tested?
    Manually generate benchmark data.
---
 .../src/java/org/apache/orc/bench/core/convert/GenerateVariants.java    | 2 +-
 .../core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
index 0fc3683..fc24636 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
@@ -113,7 +113,7 @@ public class GenerateVariants implements OrcBenchmark {
   public void run(String[] args) throws Exception {
     CommandLine cli = parseCommandLine(args);
     String[] compressList =
-        cli.getOptionValue("compress", "none,snappy,zlib").split(",");
+        cli.getOptionValue("compress", "snappy,zlib,zstd").split(",");
     String[] dataList =
         cli.getOptionValue("data", "taxi,sales,github").split(",");
     String[] formatList =
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
index 05ef932..1d094a5 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
@@ -67,7 +67,7 @@ public class ScanVariants implements OrcBenchmark {
   public void run(String[] args) throws Exception {
     CommandLine cli = parseCommandLine(args);
     String[] compressList =
-        cli.getOptionValue("compress", "none,snappy,gz").split(",");
+        cli.getOptionValue("compress", "snappy,gz,zstd").split(",");
     String[] dataList =
         cli.getOptionValue("data", "taxi,sales,github").split(",");
     String[] formatList =