You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/31 03:59:03 UTC

[doris] branch master updated: [fix](multi catalog)Collect decimal and date type min max statistic value (#16262)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a8a29427f6 [fix](multi catalog)Collect decimal and date type min max statistic value (#16262)
a8a29427f6 is described below

commit a8a29427f66dd900eb33184e11d9aa56be620560
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Tue Jan 31 11:58:56 2023 +0800

    [fix](multi catalog)Collect decimal and date type min max statistic value (#16262)
    
    The min and max value of decimal and date columns in hive external table are incorrect,
    this pr is to parse the min max value in HMS correctly.
---
 .../doris/catalog/external/HMSExternalTable.java   |  1 +
 .../planner/external/ExternalFileScanNode.java     |  1 +
 .../apache/doris/statistics/HiveAnalysisTask.java  | 45 +++++++++++++++++-----
 3 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index a0b9535f89..638c1642ec 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -255,6 +255,7 @@ public class HMSExternalTable extends ExternalTable {
      * get the dla type for scan node to get right information.
      */
     public DLAType getDlaType() {
+        makeSureInitialized();
         return dlaType;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
index 37d7fd58f3..de007632c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
@@ -597,6 +597,7 @@ public class ExternalFileScanNode extends ExternalScanNode {
     @Override
     public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
         StringBuilder output = new StringBuilder();
+        output.append(prefix).append("table: ").append(desc.getTable().getName()).append("\n");
         if (!conjuncts.isEmpty()) {
             output.append(prefix).append("predicates: ").append(getExplainString(conjuncts)).append("\n");
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index 836e3c6ae7..d22e2abe78 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -28,6 +28,7 @@ import org.apache.commons.text.StringSubstitutor;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Decimal;
 import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
@@ -36,7 +37,10 @@ import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.text.SimpleDateFormat;
+import java.time.LocalDate;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
@@ -139,8 +143,8 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
     private void getStatData(ColumnStatisticsData data, Map<String, String> params) {
         long ndv = 0;
         long nulls = 0;
-        String min;
-        String max;
+        String min = "";
+        String max = "";
         // Collect ndv, nulls, min and max for different data type.
         if (data.isSetLongStats()) {
             LongColumnStatsData longStats = data.getLongStats();
@@ -152,15 +156,25 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
             StringColumnStatsData stringStats = data.getStringStats();
             ndv = stringStats.getNumDVs();
             nulls = stringStats.getNumNulls();
-            min = "No value";
-            max = String.valueOf(stringStats.getMaxColLen());
         } else if (data.isSetDecimalStats()) {
-            // TODO: Need a more accurate way to collect decimal values.
             DecimalColumnStatsData decimalStats = data.getDecimalStats();
             ndv = decimalStats.getNumDVs();
             nulls = decimalStats.getNumNulls();
-            min = decimalStats.getLowValue().toString();
-            max = decimalStats.getHighValue().toString();
+            if (decimalStats.isSetLowValue()) {
+                Decimal lowValue = decimalStats.getLowValue();
+                if (lowValue != null) {
+                    BigDecimal lowDecimal = new BigDecimal(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
+                    min = lowDecimal.toString();
+                }
+            }
+            if (decimalStats.isSetHighValue()) {
+                Decimal highValue = decimalStats.getHighValue();
+                if (highValue != null) {
+                    BigDecimal highDecimal = new BigDecimal(
+                            new BigInteger(highValue.getUnscaled()), highValue.getScale());
+                    max = highDecimal.toString();
+                }
+            }
         } else if (data.isSetDoubleStats()) {
             DoubleColumnStatsData doubleStats = data.getDoubleStats();
             ndv = doubleStats.getNumDVs();
@@ -168,12 +182,23 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
             min = String.valueOf(doubleStats.getLowValue());
             max = String.valueOf(doubleStats.getHighValue());
         } else if (data.isSetDateStats()) {
-            // TODO: Need a more accurate way to collect date values.
             DateColumnStatsData dateStats = data.getDateStats();
             ndv = dateStats.getNumDVs();
             nulls = dateStats.getNumNulls();
-            min = dateStats.getLowValue().toString();
-            max = dateStats.getHighValue().toString();
+            if (dateStats.isSetLowValue()) {
+                org.apache.hadoop.hive.metastore.api.Date lowValue = dateStats.getLowValue();
+                if (lowValue != null) {
+                    LocalDate lowDate = LocalDate.ofEpochDay(lowValue.getDaysSinceEpoch());
+                    min = lowDate.toString();
+                }
+            }
+            if (dateStats.isSetHighValue()) {
+                org.apache.hadoop.hive.metastore.api.Date highValue = dateStats.getHighValue();
+                if (highValue != null) {
+                    LocalDate highDate = LocalDate.ofEpochDay(highValue.getDaysSinceEpoch());
+                    max = highDate.toString();
+                }
+            }
         } else {
             throw new RuntimeException("Not supported data type.");
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org