You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/10/08 11:35:10 UTC

[GitHub] [doris] wsjz opened a new pull request, #13184: [feature-wip](parquet-reader) fix string test

wsjz opened a new pull request, #13184:
URL: https://github.com/apache/doris/pull/13184

   # Proposed changes
   
   Issue Number: close #xxx
   
   ## Problem summary
   
   Describe your changes.
   
   ## Checklist(Required)
   
   1. Does it affect the original behavior: 
       - [ ] Yes
       - [ ] No
       - [ ] I don't know
   2. Has unit tests been added:
       - [ ] Yes
       - [ ] No
       - [ ] No Need
   3. Has document been added or modified:
       - [ ] Yes
       - [ ] No
       - [ ] No Need
   4. Does it need to update dependencies:
       - [ ] Yes
       - [ ] No
   5. Are there any changes that cannot be rolled back:
       - [ ] Yes (If Yes, please explain WHY)
       - [ ] No
   
   ## Further comments
   
   If this is a relatively large or complex change, kick off the discussion at [dev@doris.apache.org](mailto:dev@doris.apache.org) by explaining why you chose the solution you did and what alternatives you considered, etc...
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [doris] morningman merged pull request #13184: [feature-wip](parquet-reader) fix string test and support decimal64

Posted by GitBox <gi...@apache.org>.
morningman merged PR #13184:
URL: https://github.com/apache/doris/pull/13184


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [doris] morningman commented on a diff in pull request #13184: [feature-wip](parquet-reader) fix string test

Posted by GitBox <gi...@apache.org>.
morningman commented on code in PR #13184:
URL: https://github.com/apache/doris/pull/13184#discussion_r992901758


##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -76,40 +56,85 @@ namespace doris::vectorized {
         return true;                                                       \
     }
 
-static bool _eval_in_val(PrimitiveType conjunct_type, std::vector<void*> in_pred_values,
-                         const char* min_bytes, const char* max_bytes) {
-    switch (conjunct_type) {
+struct ColumnMinMaxParams {
+    PrimitiveType conjunct_type;
+    tparquet::Type::type parquet_type;
+    void* value;
+    // Use for decimal type
+    int32_t parquet_precision;
+    int32_t parquet_scale;
+    int32_t parquet_type_length;
+    // Use for in predicate
+    std::vector<void*> in_pred_values;
+    const char* min_bytes;
+    const char* max_bytes;
+};
+
+template <typename T>
+static void _align_decimal_v2_scale(T* conjunct_value, int32_t value_scale, T* parquet_value,
+                                    int32_t parquet_scale) {
+    if (value_scale > parquet_scale) {
+        *parquet_value = *parquet_value * common::exp10_i32(value_scale - parquet_scale);
+    } else if (value_scale < parquet_scale) {
+        *conjunct_value = *conjunct_value * common::exp10_i32(parquet_scale - value_scale);
+    }
+}
+
+template <typename T>
+static void _decode_decimal_v2_to_primary(ColumnMinMaxParams& params, const char* raw_parquet_val,

Review Comment:
   ```suggestion
   static void _decode_decimal_v2_to_primary(const ColumnMinMaxParams& params, const char* raw_parquet_val,
   ```



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -120,34 +145,74 @@ static bool _eval_in_val(PrimitiveType conjunct_type, std::vector<void*> in_pred
     return false;
 }
 
-static bool _eval_eq(PrimitiveType conjunct_type, void* value, const char* min_bytes,
-                     const char* max_bytes) {
-    switch (conjunct_type) {
+static bool _eval_eq(ColumnMinMaxParams& params) {

Review Comment:
   ```suggestion
   static bool _eval_eq(const ColumnMinMaxParams& params) {
   ```
   
   Same as all others



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -76,40 +56,85 @@ namespace doris::vectorized {
         return true;                                                       \
     }
 
-static bool _eval_in_val(PrimitiveType conjunct_type, std::vector<void*> in_pred_values,
-                         const char* min_bytes, const char* max_bytes) {
-    switch (conjunct_type) {
+struct ColumnMinMaxParams {
+    PrimitiveType conjunct_type;
+    tparquet::Type::type parquet_type;
+    void* value;
+    // Use for decimal type
+    int32_t parquet_precision;
+    int32_t parquet_scale;
+    int32_t parquet_type_length;
+    // Use for in predicate
+    std::vector<void*> in_pred_values;
+    const char* min_bytes;
+    const char* max_bytes;
+};
+
+template <typename T>
+static void _align_decimal_v2_scale(T* conjunct_value, int32_t value_scale, T* parquet_value,
+                                    int32_t parquet_scale) {
+    if (value_scale > parquet_scale) {
+        *parquet_value = *parquet_value * common::exp10_i32(value_scale - parquet_scale);
+    } else if (value_scale < parquet_scale) {
+        *conjunct_value = *conjunct_value * common::exp10_i32(parquet_scale - value_scale);
+    }
+}
+
+template <typename T>
+static void _decode_decimal_v2_to_primary(ColumnMinMaxParams& params, const char* raw_parquet_val,
+                                          T* out_value, T* parquet_val) {
+    *parquet_val = reinterpret_cast<const T*>(raw_parquet_val)[0];
+    DecimalV2Value conjunct_value = *((DecimalV2Value*)params.value);
+    *out_value = conjunct_value.value();
+    _align_decimal_v2_scale(out_value, conjunct_value.scale(), parquet_val, params.parquet_scale);
+}
+
+static Int128 _decode_value_to_int128(ColumnMinMaxParams& params, const char* raw_parquet_val) {

Review Comment:
   ```suggestion
   static Int128 _decode_value_to_int128(const ColumnMinMaxParams& params, const char* raw_parquet_val) {
   ```



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -120,34 +145,74 @@ static bool _eval_in_val(PrimitiveType conjunct_type, std::vector<void*> in_pred
     return false;
 }
 
-static bool _eval_eq(PrimitiveType conjunct_type, void* value, const char* min_bytes,
-                     const char* max_bytes) {
-    switch (conjunct_type) {
+static bool _eval_eq(ColumnMinMaxParams& params) {
+    switch (params.conjunct_type) {
     case TYPE_TINYINT: {
-        _PLAIN_DECODE(int16_t, value, min_bytes, max_bytes, conjunct_value, min, max)
+        _PLAIN_DECODE(int16_t, params.value, params.min_bytes, params.max_bytes, conjunct_value,
+                      min, max)
         _FILTER_GROUP_BY_EQ_PRED(conjunct_value, min, max)
         break;
     }
     case TYPE_SMALLINT: {
-        _PLAIN_DECODE(int16_t, value, min_bytes, max_bytes, conjunct_value, min, max)
+        _PLAIN_DECODE(int16_t, params.value, params.min_bytes, params.max_bytes, conjunct_value,
+                      min, max)
         _FILTER_GROUP_BY_EQ_PRED(conjunct_value, min, max)
         break;
     }
+    case TYPE_DECIMAL32:
     case TYPE_INT: {
-        _PLAIN_DECODE(int32_t, value, min_bytes, max_bytes, conjunct_value, min, max)
+        _PLAIN_DECODE(int32_t, params.value, params.min_bytes, params.max_bytes, conjunct_value,
+                      min, max)
         _FILTER_GROUP_BY_EQ_PRED(conjunct_value, min, max)
         break;
     }
+    case TYPE_DECIMAL64:
     case TYPE_BIGINT: {
-        _PLAIN_DECODE(int64_t, value, min_bytes, max_bytes, conjunct_value, min, max)
+        _PLAIN_DECODE(int64_t, params.value, params.min_bytes, params.max_bytes, conjunct_value,
+                      min, max)
         _FILTER_GROUP_BY_EQ_PRED(conjunct_value, min, max)
         break;
     }
+    case TYPE_DECIMALV2: {
+        if (params.parquet_type == tparquet::Type::INT32) {
+            int32_t min_value = reinterpret_cast<const int32_t*>(params.min_bytes)[0];
+            int32_t max_value = reinterpret_cast<const int32_t*>(params.max_bytes)[0];
+            DecimalV2Value conjunct_value = *((DecimalV2Value*)params.value);

Review Comment:
   Same as all others



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -76,40 +56,85 @@ namespace doris::vectorized {
         return true;                                                       \
     }
 
-static bool _eval_in_val(PrimitiveType conjunct_type, std::vector<void*> in_pred_values,
-                         const char* min_bytes, const char* max_bytes) {
-    switch (conjunct_type) {
+struct ColumnMinMaxParams {
+    PrimitiveType conjunct_type;
+    tparquet::Type::type parquet_type;
+    void* value;
+    // Use for decimal type
+    int32_t parquet_precision;
+    int32_t parquet_scale;
+    int32_t parquet_type_length;
+    // Use for in predicate
+    std::vector<void*> in_pred_values;
+    const char* min_bytes;
+    const char* max_bytes;
+};
+
+template <typename T>
+static void _align_decimal_v2_scale(T* conjunct_value, int32_t value_scale, T* parquet_value,
+                                    int32_t parquet_scale) {
+    if (value_scale > parquet_scale) {
+        *parquet_value = *parquet_value * common::exp10_i32(value_scale - parquet_scale);
+    } else if (value_scale < parquet_scale) {
+        *conjunct_value = *conjunct_value * common::exp10_i32(parquet_scale - value_scale);
+    }
+}
+
+template <typename T>
+static void _decode_decimal_v2_to_primary(ColumnMinMaxParams& params, const char* raw_parquet_val,
+                                          T* out_value, T* parquet_val) {
+    *parquet_val = reinterpret_cast<const T*>(raw_parquet_val)[0];
+    DecimalV2Value conjunct_value = *((DecimalV2Value*)params.value);
+    *out_value = conjunct_value.value();
+    _align_decimal_v2_scale(out_value, conjunct_value.scale(), parquet_val, params.parquet_scale);
+}
+
+static Int128 _decode_value_to_int128(ColumnMinMaxParams& params, const char* raw_parquet_val) {
+    const uint8_t* buf = reinterpret_cast<const uint8_t*>(raw_parquet_val);
+    int32_t length = params.parquet_type_length;
+    Int128 value = buf[0] & 0x80 ? -1 : 0;
+    memcpy(reinterpret_cast<uint8_t*>(&value) + sizeof(value) - length, buf, length);
+    return BigEndian::ToHost128(value);
+}
+
+static bool _eval_in_val(ColumnMinMaxParams& params) {

Review Comment:
   ```suggestion
   static bool _eval_in_val(const ColumnMinMaxParams& params) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org