You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ab...@apache.org on 2023/03/16 09:57:52 UTC

[druid] branch master updated: Fixes parquet uint_32 datatype conversion (#13935)

This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 6837289cb0 Fixes parquet uint_32 datatype conversion (#13935)
6837289cb0 is described below

commit 6837289cb043fdf57375ad7911fb68a2d2a84276
Author: Tejaswini Bandlamudi <96...@users.noreply.github.com>
AuthorDate: Thu Mar 16 15:27:38 2023 +0530

    Fixes parquet uint_32 datatype conversion (#13935)
    
    After parquet ingestion, uint_32 parquet datatypes are stored as null values in the dataSource. This PR fixes this conversion bug.
---
 .../example/datatypes/uint32_test.parquet            | Bin 0 -> 2284 bytes
 .../input/parquet/simple/ParquetGroupConverter.java  |   1 +
 .../data/input/parquet/WikiParquetReaderTest.java    |  19 +++++++++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet b/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet
new file mode 100644
index 0000000000..292fc6ba9e
Binary files /dev/null and b/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet differ
diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
index 88902cede1..13e21fa2a0 100644
--- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
+++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
@@ -364,6 +364,7 @@ class ParquetGroupConverter
           case UINT_16:
             return g.getInteger(fieldIndex, index);
           case UINT_32:
+            return Integer.toUnsignedLong(g.getInteger(fieldIndex, index));
           case UINT_64:
             return g.getLong(fieldIndex, index);
           case DECIMAL:
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
index 4bc7bac27b..82ac2acd27 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
@@ -79,4 +79,23 @@ public class WikiParquetReaderTest extends BaseParquetReaderTest
                                 + "}";
     Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
   }
+
+  @Test
+  public void testUint32Datatype() throws IOException
+  {
+    InputRowSchema schema = new InputRowSchema(
+        new TimestampSpec("time", "millis", null),
+        new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo", "bar"))),
+        ColumnsFilter.all()
+    );
+    InputEntityReader reader = createReader("example/datatypes/uint32_test.parquet", schema, JSONPathSpec.DEFAULT);
+    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
+
+    final String expectedJson = "{\n"
+                                + "  \"bar\" : 2147483649,\n"
+                                + "  \"foo\" : \"baz\",\n"
+                                + "  \"time\" : 1678853101621\n"
+                                + "}";
+    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org