You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/31 03:59:03 UTC
[doris] branch master updated: [fix](multi catalog)Collect decimal and date type min max statistic value (#16262)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a8a29427f6 [fix](multi catalog)Collect decimal and date type min max statistic value (#16262)
a8a29427f6 is described below
commit a8a29427f66dd900eb33184e11d9aa56be620560
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Tue Jan 31 11:58:56 2023 +0800
[fix](multi catalog)Collect decimal and date type min max statistic value (#16262)
The min and max value of decimal and date columns in hive external table are incorrect,
this pr is to parse the min max value in HMS correctly.
---
.../doris/catalog/external/HMSExternalTable.java | 1 +
.../planner/external/ExternalFileScanNode.java | 1 +
.../apache/doris/statistics/HiveAnalysisTask.java | 45 +++++++++++++++++-----
3 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index a0b9535f89..638c1642ec 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -255,6 +255,7 @@ public class HMSExternalTable extends ExternalTable {
* get the dla type for scan node to get right information.
*/
public DLAType getDlaType() {
+ makeSureInitialized();
return dlaType;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
index 37d7fd58f3..de007632c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
@@ -597,6 +597,7 @@ public class ExternalFileScanNode extends ExternalScanNode {
@Override
public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
StringBuilder output = new StringBuilder();
+ output.append(prefix).append("table: ").append(desc.getTable().getName()).append("\n");
if (!conjuncts.isEmpty()) {
output.append(prefix).append("predicates: ").append(getExplainString(conjuncts)).append("\n");
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index 836e3c6ae7..d22e2abe78 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -28,6 +28,7 @@ import org.apache.commons.text.StringSubstitutor;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
@@ -36,7 +37,10 @@ import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.math.BigDecimal;
+import java.math.BigInteger;
import java.text.SimpleDateFormat;
+import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@@ -139,8 +143,8 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
private void getStatData(ColumnStatisticsData data, Map<String, String> params) {
long ndv = 0;
long nulls = 0;
- String min;
- String max;
+ String min = "";
+ String max = "";
// Collect ndv, nulls, min and max for different data type.
if (data.isSetLongStats()) {
LongColumnStatsData longStats = data.getLongStats();
@@ -152,15 +156,25 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
StringColumnStatsData stringStats = data.getStringStats();
ndv = stringStats.getNumDVs();
nulls = stringStats.getNumNulls();
- min = "No value";
- max = String.valueOf(stringStats.getMaxColLen());
} else if (data.isSetDecimalStats()) {
- // TODO: Need a more accurate way to collect decimal values.
DecimalColumnStatsData decimalStats = data.getDecimalStats();
ndv = decimalStats.getNumDVs();
nulls = decimalStats.getNumNulls();
- min = decimalStats.getLowValue().toString();
- max = decimalStats.getHighValue().toString();
+ if (decimalStats.isSetLowValue()) {
+ Decimal lowValue = decimalStats.getLowValue();
+ if (lowValue != null) {
+ BigDecimal lowDecimal = new BigDecimal(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
+ min = lowDecimal.toString();
+ }
+ }
+ if (decimalStats.isSetHighValue()) {
+ Decimal highValue = decimalStats.getHighValue();
+ if (highValue != null) {
+ BigDecimal highDecimal = new BigDecimal(
+ new BigInteger(highValue.getUnscaled()), highValue.getScale());
+ max = highDecimal.toString();
+ }
+ }
} else if (data.isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = data.getDoubleStats();
ndv = doubleStats.getNumDVs();
@@ -168,12 +182,23 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
min = String.valueOf(doubleStats.getLowValue());
max = String.valueOf(doubleStats.getHighValue());
} else if (data.isSetDateStats()) {
- // TODO: Need a more accurate way to collect date values.
DateColumnStatsData dateStats = data.getDateStats();
ndv = dateStats.getNumDVs();
nulls = dateStats.getNumNulls();
- min = dateStats.getLowValue().toString();
- max = dateStats.getHighValue().toString();
+ if (dateStats.isSetLowValue()) {
+ org.apache.hadoop.hive.metastore.api.Date lowValue = dateStats.getLowValue();
+ if (lowValue != null) {
+ LocalDate lowDate = LocalDate.ofEpochDay(lowValue.getDaysSinceEpoch());
+ min = lowDate.toString();
+ }
+ }
+ if (dateStats.isSetHighValue()) {
+ org.apache.hadoop.hive.metastore.api.Date highValue = dateStats.getHighValue();
+ if (highValue != null) {
+ LocalDate highDate = LocalDate.ofEpochDay(highValue.getDaysSinceEpoch());
+ max = highDate.toString();
+ }
+ }
} else {
throw new RuntimeException("Not supported data type.");
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org