You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/10 20:37:11 UTC

[orc] branch main updated: ORC-930: Ignore unsupported JSON x ZSTD combination in bench (#844)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 2c1599e  ORC-930: Ignore unsupported JSON x ZSTD combination in bench (#844)
2c1599e is described below

commit 2c1599ea223cc4e480686e1e7515a00285ba7dd6
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Tue Aug 10 13:36:06 2021 -0700

    ORC-930: Ignore unsupported JSON x ZSTD combination in bench (#844)
    
    ### What changes were proposed in this pull request?
    
    This PR aims to ignore unsupported JSON x ZSTD combination in bench.
    
    ### Why are the changes needed?
    
    ```
    $ java -jar core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -s 1
    Processing sales [avro, json, orc, parquet]
    [WARN ] Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    Exception in thread "main" java.lang.IllegalArgumentException: Unhandled kind ZSTD
    ```
    
    ### How was this patch tested?
    
    Manually.
    
    ```
    $ java -jar core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -s 1
    Processing sales [avro, json, orc, parquet]
    ```
    
    ```
    $ java -jar core/target/orc-benchmarks-core-*-uber.jar scan data -d sales -c zstd
    [WARN ] Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    data/generated/sales/avro.zstd rows: 1 batches: 1
    data/generated/sales/orc.zstd rows: 1 batches: 1
    data/generated/sales/parquet.zstd rows: 1 batches: 1
    ```
---
 .../org/apache/orc/bench/core/convert/GenerateVariants.java  | 12 ++++++++++--
 .../java/org/apache/orc/bench/core/convert/ScanVariants.java |  3 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
index eb51627..efa332d 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
@@ -134,6 +134,10 @@ public class GenerateVariants implements OrcBenchmark {
         CompressionKind compressionKind =
             CompressionKind.valueOf(compressList[compress].toUpperCase());
         for(int format=0; format < formatList.length; ++format) {
+          if (compressionKind == CompressionKind.ZSTD && formatList[format].equals("json")) {
+            System.out.println("Ignore JSON format with ZSTD compression case");
+            continue; // JSON doesn't support ZSTD
+          }
           Path outPath = Utilities.getVariant(root, data, formatList[format],
               compressionKind.getExtension());
           writers[compress * formatList.length + format] =
@@ -147,14 +151,18 @@ public class GenerateVariants implements OrcBenchmark {
         VectorizedRowBatch batch = schema.createRowBatch();
         while (reader.nextBatch(batch)) {
           for (BatchWriter writer : writers) {
-            writer.writeBatch(batch);
+            if (writer != null) {
+              writer.writeBatch(batch);
+            }
           }
         }
       }
 
       // Close all the writers
       for (BatchWriter writer : writers) {
-        writer.close();
+        if (writer != null) {
+          writer.close();
+        }
       }
     }
   }
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
index 14c570d..05ef932 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/ScanVariants.java
@@ -80,6 +80,9 @@ public class ScanVariants implements OrcBenchmark {
       for (String compress : compressList) {
         CompressionKind compressKind = CompressionKind.fromExtension(compress);
         for (String format : formatList) {
+          if (compressKind == CompressionKind.ZSTD && format.equals("json")) {
+            continue; // JSON doesn't support ZSTD
+          }
           Path filename = Utilities.getVariant(root, data, format,
               compress);
           BatchReader reader = GenerateVariants.createFileReader(filename,