You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ab...@apache.org on 2023/03/16 09:57:52 UTC
[druid] branch master updated: Fixes parquet uint_32 datatype conversion (#13935)
This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 6837289cb0 Fixes parquet uint_32 datatype conversion (#13935)
6837289cb0 is described below
commit 6837289cb043fdf57375ad7911fb68a2d2a84276
Author: Tejaswini Bandlamudi <96...@users.noreply.github.com>
AuthorDate: Thu Mar 16 15:27:38 2023 +0530
Fixes parquet uint_32 datatype conversion (#13935)
After parquet ingestion, uint_32 parquet datatypes are stored as null values in the dataSource. This PR fixes this conversion bug.
---
.../example/datatypes/uint32_test.parquet | Bin 0 -> 2284 bytes
.../input/parquet/simple/ParquetGroupConverter.java | 1 +
.../data/input/parquet/WikiParquetReaderTest.java | 19 +++++++++++++++++++
3 files changed, 20 insertions(+)
diff --git a/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet b/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet
new file mode 100644
index 0000000000..292fc6ba9e
Binary files /dev/null and b/extensions-core/parquet-extensions/example/datatypes/uint32_test.parquet differ
diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
index 88902cede1..13e21fa2a0 100644
--- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
+++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java
@@ -364,6 +364,7 @@ class ParquetGroupConverter
case UINT_16:
return g.getInteger(fieldIndex, index);
case UINT_32:
+ return Integer.toUnsignedLong(g.getInteger(fieldIndex, index));
case UINT_64:
return g.getLong(fieldIndex, index);
case DECIMAL:
diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
index 4bc7bac27b..82ac2acd27 100644
--- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
+++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/WikiParquetReaderTest.java
@@ -79,4 +79,23 @@ public class WikiParquetReaderTest extends BaseParquetReaderTest
+ "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
+
+ @Test
+ public void testUint32Datatype() throws IOException
+ {
+ InputRowSchema schema = new InputRowSchema(
+ new TimestampSpec("time", "millis", null),
+ new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo", "bar"))),
+ ColumnsFilter.all()
+ );
+ InputEntityReader reader = createReader("example/datatypes/uint32_test.parquet", schema, JSONPathSpec.DEFAULT);
+ List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
+
+ final String expectedJson = "{\n"
+ + " \"bar\" : 2147483649,\n"
+ + " \"foo\" : \"baz\",\n"
+ + " \"time\" : 1678853101621\n"
+ + "}";
+ Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org