You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/07/04 09:31:36 UTC

[GitHub] [doris] morningman commented on a diff in pull request #10547: [External Table]Fix hive partiton prune in hive and hudi exteranl table.

morningman commented on code in PR #10547:
URL: https://github.com/apache/doris/pull/10547#discussion_r912795745


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java:
##########
@@ -465,68 +559,58 @@ public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(Expr dorisExpr,
             case LT:
             case EQ_FOR_NULL:
                 BinaryPredicate eq = (BinaryPredicate) dorisExpr;
+                // Make sure the col slot is always first
                 SlotRef slotRef = convertDorisExprToSlotRef(eq.getChild(0));
-                LiteralExpr literalExpr = null;
-                if (slotRef == null && eq.getChild(0).isLiteral()) {
-                    literalExpr = (LiteralExpr) eq.getChild(0);
-                    slotRef = convertDorisExprToSlotRef(eq.getChild(1));
-                } else if (eq.getChild(1).isLiteral()) {
-                    literalExpr = (LiteralExpr) eq.getChild(1);
-                }
+                LiteralExpr literalExpr = (LiteralExpr) eq.getChild(1);

Review Comment:
   The `eq.getChild(1)` may NOT be a `LiteralExpr`, this may throw CastException.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java:
##########
@@ -400,61 +401,154 @@ public static List<FieldSchema> getSchema(String dbName, String tableName, Strin
 
     /**
      * Convert Doris expr to Hive expr, only for partition column
-     * @param dorisExpr
-     * @param partitions
      * @param tblName
      * @return
      * @throws DdlException
      * @throws SemanticException
      */
-    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(Expr dorisExpr,
+    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(List<Expr> conjuncts,
+            List<String> partitionKeys, String tblName) throws DdlException {
+        List<ExprNodeDesc> hivePredicates = new ArrayList<>();
+
+        for (Expr conjunct : conjuncts) {
+            ExprNodeGenericFuncDesc hiveExpr = HiveMetaStoreClientHelper.convertToHivePartitionExpr(
+                    conjunct, partitionKeys, tblName).getFuncDesc();
+            if (hiveExpr != null) {
+                hivePredicates.add(hiveExpr);
+            }
+        }
+        int count = hivePredicates.size();
+        // combine all predicate by `and`
+        // compoundExprs must have at least 2 predicates
+        if (count >= 2) {
+            return HiveMetaStoreClientHelper.getCompoundExpr(hivePredicates, "and");
+        } else if (count == 1) {
+            // only one predicate
+            return (ExprNodeGenericFuncDesc) hivePredicates.get(0);
+        } else {
+            return genAlwaysTrueExpr(tblName);
+        }
+    }
+
+    private static ExprNodeGenericFuncDesc genAlwaysTrueExpr(String tblName) throws DdlException {
+        // have no predicate, make a dummy predicate "1=1" to get all partitions
+        HiveMetaStoreClientHelper.ExprBuilder exprBuilder =
+                new HiveMetaStoreClientHelper.ExprBuilder(tblName);
+        return exprBuilder.val(TypeInfoFactory.intTypeInfo, 1)
+                .val(TypeInfoFactory.intTypeInfo, 1)
+                .pred("=", 2).build();
+    }
+
+    private static class ExprNodeGenericFuncDescContext {
+        private final ExprNodeGenericFuncDesc funcDesc;
+        private final boolean isPartitionFiled;
+        private final boolean supportedOp;
+
+        public ExprNodeGenericFuncDescContext(ExprNodeGenericFuncDesc funcDesc) {
+            this.funcDesc = funcDesc;
+            this.isPartitionFiled = true;
+            this.supportedOp = true;
+        }
+
+        public ExprNodeGenericFuncDescContext(boolean isPartitionFiled, boolean supportedOp) {
+            this.funcDesc = null;
+            this.isPartitionFiled = true;

Review Comment:
   unused input parameters?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java:
##########
@@ -400,61 +401,154 @@ public static List<FieldSchema> getSchema(String dbName, String tableName, Strin
 
     /**
      * Convert Doris expr to Hive expr, only for partition column
-     * @param dorisExpr
-     * @param partitions
      * @param tblName
      * @return
      * @throws DdlException
      * @throws SemanticException
      */
-    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(Expr dorisExpr,
+    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(List<Expr> conjuncts,
+            List<String> partitionKeys, String tblName) throws DdlException {
+        List<ExprNodeDesc> hivePredicates = new ArrayList<>();
+
+        for (Expr conjunct : conjuncts) {
+            ExprNodeGenericFuncDesc hiveExpr = HiveMetaStoreClientHelper.convertToHivePartitionExpr(
+                    conjunct, partitionKeys, tblName).getFuncDesc();
+            if (hiveExpr != null) {
+                hivePredicates.add(hiveExpr);
+            }
+        }
+        int count = hivePredicates.size();
+        // combine all predicate by `and`
+        // compoundExprs must have at least 2 predicates
+        if (count >= 2) {
+            return HiveMetaStoreClientHelper.getCompoundExpr(hivePredicates, "and");
+        } else if (count == 1) {
+            // only one predicate
+            return (ExprNodeGenericFuncDesc) hivePredicates.get(0);
+        } else {
+            return genAlwaysTrueExpr(tblName);
+        }
+    }
+
+    private static ExprNodeGenericFuncDesc genAlwaysTrueExpr(String tblName) throws DdlException {
+        // have no predicate, make a dummy predicate "1=1" to get all partitions
+        HiveMetaStoreClientHelper.ExprBuilder exprBuilder =
+                new HiveMetaStoreClientHelper.ExprBuilder(tblName);
+        return exprBuilder.val(TypeInfoFactory.intTypeInfo, 1)
+                .val(TypeInfoFactory.intTypeInfo, 1)
+                .pred("=", 2).build();
+    }
+
+    private static class ExprNodeGenericFuncDescContext {
+        private final ExprNodeGenericFuncDesc funcDesc;
+        private final boolean isPartitionFiled;

Review Comment:
   ```suggestion
           private final boolean isPartitionField; 
   ```
   ?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java:
##########
@@ -400,61 +401,154 @@ public static List<FieldSchema> getSchema(String dbName, String tableName, Strin
 
     /**
      * Convert Doris expr to Hive expr, only for partition column
-     * @param dorisExpr
-     * @param partitions
      * @param tblName
      * @return
      * @throws DdlException
      * @throws SemanticException
      */
-    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(Expr dorisExpr,
+    public static ExprNodeGenericFuncDesc convertToHivePartitionExpr(List<Expr> conjuncts,
+            List<String> partitionKeys, String tblName) throws DdlException {
+        List<ExprNodeDesc> hivePredicates = new ArrayList<>();
+
+        for (Expr conjunct : conjuncts) {
+            ExprNodeGenericFuncDesc hiveExpr = HiveMetaStoreClientHelper.convertToHivePartitionExpr(
+                    conjunct, partitionKeys, tblName).getFuncDesc();
+            if (hiveExpr != null) {
+                hivePredicates.add(hiveExpr);
+            }
+        }
+        int count = hivePredicates.size();
+        // combine all predicate by `and`
+        // compoundExprs must have at least 2 predicates
+        if (count >= 2) {
+            return HiveMetaStoreClientHelper.getCompoundExpr(hivePredicates, "and");
+        } else if (count == 1) {
+            // only one predicate
+            return (ExprNodeGenericFuncDesc) hivePredicates.get(0);
+        } else {
+            return genAlwaysTrueExpr(tblName);
+        }
+    }
+
+    private static ExprNodeGenericFuncDesc genAlwaysTrueExpr(String tblName) throws DdlException {
+        // have no predicate, make a dummy predicate "1=1" to get all partitions
+        HiveMetaStoreClientHelper.ExprBuilder exprBuilder =
+                new HiveMetaStoreClientHelper.ExprBuilder(tblName);
+        return exprBuilder.val(TypeInfoFactory.intTypeInfo, 1)
+                .val(TypeInfoFactory.intTypeInfo, 1)
+                .pred("=", 2).build();
+    }
+
+    private static class ExprNodeGenericFuncDescContext {
+        private final ExprNodeGenericFuncDesc funcDesc;
+        private final boolean isPartitionFiled;
+        private final boolean supportedOp;
+
+        public ExprNodeGenericFuncDescContext(ExprNodeGenericFuncDesc funcDesc) {
+            this.funcDesc = funcDesc;
+            this.isPartitionFiled = true;
+            this.supportedOp = true;
+        }
+
+        public ExprNodeGenericFuncDescContext(boolean isPartitionFiled, boolean supportedOp) {
+            this.funcDesc = null;
+            this.isPartitionFiled = true;
+            this.supportedOp = true;
+        }
+
+        /**
+         * If it's a partition key and also is a supportedOp, we can go through into next loop.
+         */
+        public boolean keepForAndOp() {
+            return isPartitionFiled  && supportedOp;
+        }
+
+        /**
+         * If it's not a partition key, we should ignore this expr in `or`.
+         */
+        public boolean isPartitionFiled() {
+            return isPartitionFiled;
+        }
+
+        /**
+         * And if is a partition key and also is a not supportedOp, this is an always true expr.
+         */
+        public boolean alwaysTrueForOrOp() {
+            return isPartitionFiled && !supportedOp;
+        }
+
+        public ExprNodeGenericFuncDesc getFuncDesc() {
+            return funcDesc;
+        }
+    }
+
+    private static ExprNodeGenericFuncDescContext convertToHivePartitionExpr(Expr dorisExpr,
             List<String> partitions, String tblName) throws DdlException {
         if (dorisExpr == null) {
-            return null;
+            return new ExprNodeGenericFuncDescContext(false, false);
         }
 
         if (dorisExpr instanceof CompoundPredicate) {
             CompoundPredicate compoundPredicate = (CompoundPredicate) dorisExpr;
             switch (compoundPredicate.getOp()) {
                 case AND: {
-                    ExprNodeGenericFuncDesc left = convertToHivePartitionExpr(
+                    ExprNodeGenericFuncDescContext left = convertToHivePartitionExpr(
                             compoundPredicate.getChild(0), partitions, tblName);
-                    ExprNodeGenericFuncDesc right = convertToHivePartitionExpr(
-                            compoundPredicate.getChild(0), partitions, tblName);
-                    if (left != null && right != null) {
+                    ExprNodeGenericFuncDescContext right = convertToHivePartitionExpr(
+                            compoundPredicate.getChild(1), partitions, tblName);
+
+                    if (left.keepForAndOp() && right.keepForAndOp()) {
                         List<ExprNodeDesc> andArgs = new ArrayList<>();
-                        andArgs.add(left);
-                        andArgs.add(right);
-                        return getCompoundExpr(andArgs, "and");
-                    } else if (left != null && right == null) {
+                        andArgs.add(left.getFuncDesc());
+                        andArgs.add(right.getFuncDesc());
+                        return new ExprNodeGenericFuncDescContext(getCompoundExpr(andArgs, "and"));
+                    } else if (left.keepForAndOp()) {
                         return left;
-                    } else if (left == null && right != null) {
+                    } else if (right.keepForAndOp()) {
                         return right;
+                    } else {
+                        return new ExprNodeGenericFuncDescContext(false, false);
                     }
-                    return null;
                 }
                 case OR: {
-                    ExprNodeGenericFuncDesc left = convertToHivePartitionExpr(
-                            compoundPredicate.getChild(0), partitions, tblName);
-                    ExprNodeGenericFuncDesc right = convertToHivePartitionExpr(
+                    ExprNodeGenericFuncDescContext left = convertToHivePartitionExpr(
                             compoundPredicate.getChild(0), partitions, tblName);
-                    if (left != null && right != null) {
-                        List<ExprNodeDesc> orArgs = new ArrayList<>();
-                        orArgs.add(left);
-                        orArgs.add(right);
-                        return getCompoundExpr(orArgs, "or");
-                    } else if (left != null && right == null) {
-                        return left;
-                    } else if (left == null && right != null) {
-                        return right;
+                    ExprNodeGenericFuncDescContext right = convertToHivePartitionExpr(
+                            compoundPredicate.getChild(1), partitions, tblName);
+                    // no matter left or right can drop, we must drop both for this example:
+                    // partition_key1 = 1 or partition_key2 in (2,3) which `in` is a nonsupport op now.
+                    if (left.alwaysTrueForOrOp() || right.alwaysTrueForOrOp()) {
+                        // return 1 = 1 expr for these examples:
+                        // (partition_key1 = 1 or partition_key2 in (2,3)) or partition_key3 = 4 => always true
+                        // (partition_key1 = 1 or partition_key2 in (2,3)) and partition_key3 = 4 => partition_key3 = 4
+                        return new ExprNodeGenericFuncDescContext(genAlwaysTrueExpr(tblName));
+                    } else {
+                        if (left.isPartitionFiled() && right.isPartitionFiled()) {
+                            List<ExprNodeDesc> orArgs = new ArrayList<>();
+                            orArgs.add(left.getFuncDesc());
+                            orArgs.add(right.getFuncDesc());
+                            return  new ExprNodeGenericFuncDescContext(getCompoundExpr(orArgs, "or"));
+                        } else if (left.isPartitionFiled()) {

Review Comment:
   I think we should return `alwaysTrue` is only one side is partition field?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org