You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by sh...@apache.org on 2022/02/25 02:26:02 UTC
[parquet-mr] branch master updated: PARQUET-2120: Make dictionary command handle pages without dictionary (#946)
This is an automated email from the ASF dual-hosted git repository.
shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 2431c5c PARQUET-2120: Make dictionary command handle pages without dictionary (#946)
2431c5c is described below
commit 2431c5c1333855c9efd532324dee5b771b0780bf
Author: Willi Raschkowski <wi...@raschkowski.com>
AuthorDate: Fri Feb 25 02:25:54 2022 +0000
PARQUET-2120: Make dictionary command handle pages without dictionary (#946)
---
.../cli/commands/ShowDictionaryCommand.java | 75 ++++++++++++----------
.../cli/commands/ShowDictionaryCommandTest.java | 11 ++++
2 files changed, 52 insertions(+), 34 deletions(-)
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
index 7a167ed..5d7a609 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
@@ -75,40 +75,12 @@ public class ShowDictionaryCommand extends BaseCommand {
while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
- Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
-
- console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
- for (int i = 0; i <= dict.getMaxId(); i += 1) {
- switch(type.getPrimitiveTypeName()) {
- case BINARY:
- if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
- console.info("{}: {}", String.format("%6d", i),
- Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
- } else {
- console.info("{}: {}", String.format("%6d", i),
- Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
- }
- break;
- case INT32:
- console.info("{}: {}", String.format("%6d", i),
- dict.decodeToInt(i));
- break;
- case INT64:
- console.info("{}: {}", String.format("%6d", i),
- dict.decodeToLong(i));
- break;
- case FLOAT:
- console.info("{}: {}", String.format("%6d", i),
- dict.decodeToFloat(i));
- break;
- case DOUBLE:
- console.info("{}: {}", String.format("%6d", i),
- dict.decodeToDouble(i));
- break;
- default:
- throw new IllegalArgumentException(
- "Unknown dictionary type: " + type.getPrimitiveTypeName());
- }
+ if (page != null) {
+ console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column);
+ Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
+ printDictionary(dict, type);
+ } else {
+ console.info("\nRow group {} has no dictionary for \"{}\"", rowGroup, column);
}
reader.skipNextRowGroup();
@@ -122,6 +94,41 @@ public class ShowDictionaryCommand extends BaseCommand {
return 0;
}
+ private void printDictionary(Dictionary dict, PrimitiveType type) {
+ for (int i = 0; i <= dict.getMaxId(); i += 1) {
+ switch(type.getPrimitiveTypeName()) {
+ case BINARY:
+ if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
+ console.info("{}: {}", String.format("%6d", i),
+ Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
+ } else {
+ console.info("{}: {}", String.format("%6d", i),
+ Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
+ }
+ break;
+ case INT32:
+ console.info("{}: {}", String.format("%6d", i),
+ dict.decodeToInt(i));
+ break;
+ case INT64:
+ console.info("{}: {}", String.format("%6d", i),
+ dict.decodeToLong(i));
+ break;
+ case FLOAT:
+ console.info("{}: {}", String.format("%6d", i),
+ dict.decodeToFloat(i));
+ break;
+ case DOUBLE:
+ console.info("{}: {}", String.format("%6d", i),
+ dict.decodeToDouble(i));
+ break;
+ default:
+ throw new IllegalArgumentException(
+ "Unknown dictionary type: " + type.getPrimitiveTypeName());
+ }
+ }
+ }
+
@Override
public List<String> getExamples() {
return Lists.newArrayList(
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
index 5fe82c9..a4d796c 100644
--- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ShowDictionaryCommandTest.java
@@ -36,4 +36,15 @@ public class ShowDictionaryCommandTest extends ParquetFileTest {
command.setConf(new Configuration());
Assert.assertEquals(0, command.run());
}
+
+ @Test
+ public void testShowDirectoryCommandWithoutDictionaryEncoding() throws IOException {
+ File file = parquetFile();
+ ShowDictionaryCommand command = new ShowDictionaryCommand(createLogger());
+ command.targets = Arrays.asList(file.getAbsolutePath());
+ // the 'double_field' column does not have dictionary encoding
+ command.column = DOUBLE_FIELD;
+ command.setConf(new Configuration());
+ Assert.assertEquals(0, command.run());
+ }
}