You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by sh...@apache.org on 2022/02/25 02:26:02 UTC

[parquet-mr] branch master updated: PARQUET-2120: Make dictionary command handle pages without dictionary (#946)

This is an automated email from the ASF dual-hosted git repository.

shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 2431c5c  PARQUET-2120: Make dictionary command handle pages without dictionary (#946)
2431c5c is described below

commit 2431c5c1333855c9efd532324dee5b771b0780bf
Author: Willi Raschkowski <wi...@raschkowski.com>
AuthorDate: Fri Feb 25 02:25:54 2022 +0000

    PARQUET-2120: Make dictionary command handle pages without dictionary (#946)
---
 .../cli/commands/ShowDictionaryCommand.java        | 75 ++++++++++++----------
 .../cli/commands/ShowDictionaryCommandTest.java    | 11 ++++
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
index 7a167ed..5d7a609 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
@@ -75,40 +75,12 @@ public class ShowDictionaryCommand extends BaseCommand {
       while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
         DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
 
-        Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
-
-        console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
-        for (int i = 0; i <= dict.getMaxId(); i += 1) {
-          switch(type.getPrimitiveTypeName()) {
-            case BINARY:
-              if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
-                console.info("{}: {}", String.format("%6d", i),
-                    Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
-              } else {
-                console.info("{}: {}", String.format("%6d", i),
-                    Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
-              }
-              break;
-            case INT32:
-              console.info("{}: {}", String.format("%6d", i),
-                dict.decodeToInt(i));
-              break;
-            case INT64:
-              console.info("{}: {}", String.format("%6d", i),
-                  dict.decodeToLong(i));
-              break;
-            case FLOAT:
-              console.info("{}: {}", String.format("%6d", i),
-                  dict.decodeToFloat(i));
-              break;
-            case DOUBLE:
-              console.info("{}: {}", String.format("%6d", i),
-                  dict.decodeToDouble(i));
-              break;
-            default:
-              throw new IllegalArgumentException(
-                  "Unknown dictionary type: " + type.getPrimitiveTypeName());
-          }
+        if (page != null) {
+          console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column);
+          Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
+          printDictionary(dict, type);
+        } else {
+          console.info("\nRow group {} has no dictionary for \"{}\"", rowGroup, column);
         }
 
         reader.skipNextRowGroup();
@@ -122,6 +94,41 @@ public class ShowDictionaryCommand extends BaseCommand {
     return 0;
   }
 
+  private void printDictionary(Dictionary dict, PrimitiveType type) {
+    for (int i = 0; i <= dict.getMaxId(); i += 1) {
+      switch(type.getPrimitiveTypeName()) {
+        case BINARY:
+          if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
+            console.info("{}: {}", String.format("%6d", i),
+                    Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
+          } else {
+            console.info("{}: {}", String.format("%6d", i),
+                    Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
+          }
+          break;
+        case INT32:
+          console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToInt(i));
+          break;
+        case INT64:
+          console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToLong(i));
+          break;
+        case FLOAT:
+          console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToFloat(i));
+          break;
+        case DOUBLE:
+          console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToDouble(i));
+          break;
+        default:
+          throw new IllegalArgumentException(
+                  "Unknown dictionary type: " + type.getPrimitiveTypeName());
+      }
+    }
+  }
+
   @Override
   public List<String> getExamples() {
     return Lists.newArrayList(
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
index 5fe82c9..a4d796c 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
@@ -36,4 +36,15 @@ public class ShowDictionaryCommandTest extends ParquetFileTest {
     command.setConf(new Configuration());
     Assert.assertEquals(0, command.run());
   }
+
+  @Test
+  public void testShowDirectoryCommandWithoutDictionaryEncoding() throws IOException {
+    File file = parquetFile();
+    ShowDictionaryCommand command = new ShowDictionaryCommand(createLogger());
+    command.targets = Arrays.asList(file.getAbsolutePath());
+    // the 'double_field' column does not have dictionary encoding
+    command.column = DOUBLE_FIELD;
+    command.setConf(new Configuration());
+    Assert.assertEquals(0, command.run());
+  }
 }