You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/01 13:00:11 UTC

[doris] branch master updated: [fix](parquet-reader) reset value idx in bool rle decoder and support iceberg datetime(3) (#18245)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 97aab138aa [fix](parquet-reader) reset value idx in bool rle decoder and support iceberg datetime(3) (#18245)
97aab138aa is described below

commit 97aab138aa61402e9b9fa5f587ee8554e3be561c
Author: slothever <18...@users.noreply.github.com>
AuthorDate: Sat Apr 1 21:00:01 2023 +0800

    [fix](parquet-reader) reset value idx in bool rle decoder and support iceberg datetime(3) (#18245)
    
    1. Fix value  idx in bool rle decoder
    2. Iceberg table support datetimev2(3).  In the previous version, we converted hive timestamp to datetimev2(0) default.
---
 .../vec/exec/format/parquet/bool_rle_decoder.cpp   |  6 +--
 .../doris/catalog/HiveMetaStoreClientHelper.java   |  9 +++-
 .../doris/catalog/external/HMSExternalTable.java   |  3 +-
 .../catalog/external/IcebergExternalTable.java     |  4 +-
 .../hive/test_external_catalog_glue_table.out      | 59 ++++++++++++----------
 .../hive/test_external_catalog_glue_table.groovy   |  1 +
 6 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
index c954f98b25..46d403e6f7 100644
--- a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
@@ -24,7 +24,7 @@ void BoolRLEDecoder::set_data(Slice* slice) {
     _data = slice;
     _num_bytes = slice->size;
     _offset = 0;
-
+    _current_value_idx = 0;
     if (_num_bytes < 4) {
         LOG(FATAL) << "Received invalid length : " + std::to_string(_num_bytes) +
                               " (corrupt data page?)";
@@ -51,12 +51,11 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr
     auto& column_data = static_cast<ColumnVector<UInt8>&>(*doris_column).get_data();
     size_t data_index = column_data.size();
     column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
-    size_t max_values = column_data.size();
+    size_t max_values = select_vector.num_values() - select_vector.num_nulls();
     _values.resize(max_values);
     if (!_decoder.get_values(_values.data(), max_values)) {
         return Status::IOError("Can't read enough booleans in rle decoder");
     }
-    // _num_bytes -= max_values;
     ColumnSelectVector::DataReadType read_type;
     while (size_t run_length = select_vector.get_next_run(&read_type)) {
         switch (read_type) {
@@ -83,6 +82,7 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr
         }
         }
     }
+    _current_value_idx = 0;
     return Status::OK();
 }
 } // namespace doris::vectorized
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
index e4f2ad993b..038f108439 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
@@ -698,6 +698,13 @@ public class HiveMetaStoreClientHelper {
      * Convert hive type to doris type.
      */
     public static Type hiveTypeToDorisType(String hiveType) {
+        return hiveTypeToDorisType(hiveType, 0);
+    }
+
+    /**
+     * Convert hive type to doris type with timescale.
+     */
+    public static Type hiveTypeToDorisType(String hiveType, int timeScale) {
         String lowerCaseType = hiveType.toLowerCase();
         switch (lowerCaseType) {
             case "boolean":
@@ -713,7 +720,7 @@ public class HiveMetaStoreClientHelper {
             case "date":
                 return ScalarType.createDateV2Type();
             case "timestamp":
-                return ScalarType.createDatetimeV2Type(0);
+                return ScalarType.createDatetimeV2Type(timeScale);
             case "float":
                 return Type.FLOAT;
             case "double":
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 1a558e1bd9..7398d87867 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -318,7 +318,8 @@ public class HMSExternalTable extends ExternalTable {
         List<Column> tmpSchema = Lists.newArrayListWithCapacity(hmsSchema.size());
         for (FieldSchema field : hmsSchema) {
             tmpSchema.add(new Column(field.getName(),
-                    HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null,
+                    HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType(),
+                    IcebergExternalTable.ICEBERG_DATETIME_SCALE_MS), true, null,
                     true, null, field.getComment(), true, null,
                     schema.caseInsensitiveFindField(field.getName()).fieldId(), null));
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/IcebergExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/IcebergExternalTable.java
index 521a813182..a8998c6365 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/IcebergExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/IcebergExternalTable.java
@@ -36,6 +36,8 @@ import java.util.List;
 
 public class IcebergExternalTable extends ExternalTable {
 
+    public static final int ICEBERG_DATETIME_SCALE_MS = 3;
+
     public IcebergExternalTable(long id, String name, String dbName, IcebergExternalCatalog catalog) {
         super(id, name, catalog, dbName, TableType.ICEBERG_EXTERNAL_TABLE);
     }
@@ -88,7 +90,7 @@ public class IcebergExternalTable extends ExternalTable {
             case DATE:
                 return ScalarType.createDateV2Type();
             case TIMESTAMP:
-                return ScalarType.createDatetimeV2Type(0);
+                return ScalarType.createDatetimeV2Type(ICEBERG_DATETIME_SCALE_MS);
             case TIME:
                 return Type.UNSUPPORTED;
             default:
diff --git a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_glue_table.out b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_glue_table.out
index a2860e3bdd..9fbbbabaf0 100644
--- a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_glue_table.out
+++ b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_glue_table.out
@@ -35,26 +35,26 @@
 1876.4831949153224
 
 -- !q06 --
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:34:59
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
-2023-03-07 20:35
+2023-03-07 20:34:59.601
+2023-03-07 20:34:59.693
+2023-03-07 20:34:59.708
+2023-03-07 20:34:59.782
+2023-03-07 20:34:59.836
+2023-03-07 20:34:59.934
+2023-03-07 20:34:59.950
+2023-03-07 20:35:00.042
+2023-03-07 20:35:00.053
+2023-03-07 20:35:00.114
+2023-03-07 20:35:00.134
+2023-03-07 20:35:00.201
+2023-03-07 20:35:00.272
+2023-03-07 20:35:00.316
+2023-03-07 20:35:00.337
+2023-03-07 20:35:00.409
+2023-03-07 20:35:00.420
+2023-03-07 20:35:00.428
+2023-03-07 20:35:00.500
+2023-03-07 20:35:00.535
 
 -- !q07 --
 6f77a7baae184d                                    
@@ -82,7 +82,7 @@ f14889
 66.8626	true
 66.9046	true
 67.0202	true
-67.7351	false
+67.7351	true
 
 -- !q11 --
 54078	8184
@@ -124,8 +124,15 @@ b5e6bf2b5
 5000
 
 -- !q16 --
-2023-03-07 20:35:59
-2023-03-07 20:35:59
-2023-03-07 20:35:59
-2023-03-07 20:35:59
-2023-03-07 20:35:59
+2023-03-07 20:35:59.064
+2023-03-07 20:35:59.087
+2023-03-07 20:35:59.110
+2023-03-07 20:35:59.129
+2023-03-07 20:35:59.224
+
+-- !q17 --
+14040216	\N	2147483647	2023-03-07 20:38:02.140	81.607142423775869	b1d54a8ac60a4c8aa	66.6566	a54742979109                                      	9a8247ed7c74	false
+7847742	17740	2147483647	2023-03-07 20:36:02.376	1740.7904511543441	ff588a918be	66.8626	41c532d698024                                     	18d9fa638cd449d893	true
+9045125	27361	2147483647	2023-03-07 20:35:51.997	1245.2170379359104	b31a143e67	66.9046	52ab9d8a748f4c9                                   	5d70ec319e	true
+10410585	\N	1938534851	2023-03-07 20:35:17.731	955.1760424982325	643e7c71b83d444e9261	67.0202	6a15d14103dc4                                     	55b15adbec34	true
+10055090	\N	2147483647	2023-03-07 20:38:59.078	1387.1527042831178	47	67.7351	c4c5                                              	960637955914682b6	true
diff --git a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_glue_table.groovy b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_glue_table.groovy
index 85056ad772..26b1291ae2 100644
--- a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_glue_table.groovy
+++ b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_glue_table.groovy
@@ -47,6 +47,7 @@ suite("test_external_catalog_glue_table", "p2") {
             qt_q14 """ select glue_string from iceberg_glue_types where glue_string>'040abff1da4748e4b' order by glue_int limit 5 """
             qt_q15 """ select count(1) from iceberg_glue_types """
             qt_q16 """ select glue_timstamp from iceberg_glue_types where glue_timstamp > '2023-03-07 20:35:59' order by glue_timstamp limit 5 """
+            qt_q17 """ select * from iceberg_glue_types order by glue_decimal limit 5 """
         }
         sql """ use `iceberg_catalog`; """
         q01()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org