You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/07/18 02:29:40 UTC

[GitHub] [doris] qidaye commented on a diff in pull request #9895: [Enhancement](DOE) Step1: Fe generates the DSL and is used to explain

qidaye commented on code in PR #9895:
URL: https://github.com/apache/doris/pull/9895#discussion_r922936585


##########
fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java:
##########
@@ -210,4 +231,216 @@ private static void resolveDocValuesFields(SearchContext searchContext, JSONObje
             searchContext.docValueFieldsContext().put(colName, docValueField);
         }
     }
+
+    private static QueryBuilder toCompoundEsDsl(Expr expr) {
+        CompoundPredicate compoundPredicate = (CompoundPredicate) expr;
+        switch (compoundPredicate.getOp()) {
+            case AND: {
+                QueryBuilder left = toEsDsl(compoundPredicate.getChild(0));
+                QueryBuilder right = toEsDsl(compoundPredicate.getChild(1));
+                if (left != null && right != null) {
+                    return QueryBuilders.boolQuery().must(left).must(right);
+                }
+                return null;
+            }
+            case OR: {
+                QueryBuilder left = toEsDsl(compoundPredicate.getChild(0));
+                QueryBuilder right = toEsDsl(compoundPredicate.getChild(1));
+                if (left != null && right != null) {
+                    return QueryBuilders.boolQuery().should(left).should(right);
+                }
+                return null;
+            }
+            case NOT: {
+                QueryBuilder child = toEsDsl(compoundPredicate.getChild(0));
+                if (child != null) {
+                    return QueryBuilders.boolQuery().mustNot(child);
+                }
+                return null;
+            }
+            default:
+                return null;
+        }
+    }
+
+    /**
+     * Doris expr to es dsl.
+     **/
+    public static QueryBuilder toEsDsl(Expr expr) {
+        if (expr == null) {
+            return null;
+        }
+        // CompoundPredicate, `between` also converted to CompoundPredicate.
+        if (expr instanceof CompoundPredicate) {
+            return toCompoundEsDsl(expr);
+        }
+        TExprOpcode opCode = expr.getOpcode();
+        String column = ((SlotRef) expr.getChild(0)).getColumnName();
+        if (expr instanceof BinaryPredicate) {
+            Object value = toDorisLiteral(expr.getChild(1));
+            switch (opCode) {
+                case EQ:
+                case EQ_FOR_NULL:
+                    return QueryBuilders.termQuery(column, value);
+                case NE:
+                    return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value));
+                case GE:
+                    return QueryBuilders.rangeQuery(column).gte(value);
+                case GT:
+                    return QueryBuilders.rangeQuery(column).gt(value);
+                case LE:
+                    return QueryBuilders.rangeQuery(column).lte(value);
+                case LT:
+                    return QueryBuilders.rangeQuery(column).lt(value);
+                default:
+                    return null;
+            }
+        }
+        if (expr instanceof IsNullPredicate) {
+            IsNullPredicate isNullPredicate = (IsNullPredicate) expr;
+            if (isNullPredicate.isNotNull()) {
+                return QueryBuilders.existsQuery(column);
+            }
+            return QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(column));
+        }
+        if (expr instanceof LikePredicate) {
+            LikePredicate likePredicate = (LikePredicate) expr;
+            if (likePredicate.getOp().equals(Operator.LIKE)) {
+                char[] chars = likePredicate.getChild(1).getStringValue().toCharArray();
+                // example of translation :
+                //      abc_123  ===> abc?123
+                //      abc%ykz  ===> abc*123
+                //      %abc123  ===> *abc123
+                //      _abc123  ===> ?abc123
+                //      \\_abc1  ===> \\_abc1
+                //      abc\\_123 ===> abc\\_123
+                //      abc\\%123 ===> abc\\%123
+                // NOTE. user must input sql like 'abc\\_123' or 'abc\\%ykz'
+                for (int i = 0; i < chars.length; i++) {
+                    if (chars[i] == '_' || chars[i] == '%') {
+                        if (i == 0) {
+                            chars[i] = (chars[i] == '_') ? '?' : '*';
+                        } else if (chars[i - 1] != '\\') {
+                            chars[i] = (chars[i] == '_') ? '?' : '*';
+                        }
+                    }
+                }
+                return QueryBuilders.wildcardQuery(column, new String(chars));
+            } else {
+                return QueryBuilders.wildcardQuery(column, likePredicate.getChild(1).getStringValue());
+            }
+        }
+        if (expr instanceof InPredicate) {
+            InPredicate inPredicate = (InPredicate) expr;
+            List<Object> values = inPredicate.getListChildren().stream().map(EsUtil::toDorisLiteral)
+                    .collect(Collectors.toList());
+            if (inPredicate.isNotIn()) {
+                return QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(column, values));
+            }
+            return QueryBuilders.termsQuery(column, values);
+        }
+        if (expr instanceof FunctionCallExpr) {
+            FunctionCallExpr functionCallExpr = (FunctionCallExpr) expr;
+            if ("esquery".equals(functionCallExpr.getFnName().getFunction())) {
+                String stringValue = functionCallExpr.getChild(1).getStringValue();
+                return new QueryBuilders.EsQueryBuilder(stringValue);
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Transfer es type to doris type.
+     **/
+    public static Type toDorisType(String esType) {
+        // reference https://www.elastic.co/guide/en/elasticsearch/reference/8.3/sql-data-types.html
+        switch (esType) {
+            case "null":
+                return Type.NULL;
+            case "boolean":
+                return Type.BOOLEAN;
+            case "byte":
+                return Type.TINYINT;
+            case "short":
+                return Type.SMALLINT;
+            case "integer":
+                return Type.INT;
+            case "long":
+            case "unsigned_long":
+                return Type.BIGINT;
+            case "float":
+            case "half_float":
+                return Type.FLOAT;
+            case "double":
+            case "scaled_float":
+                return Type.DOUBLE;
+            case "keyword":
+            case "text":
+            case "ip":
+            case "nested":
+            case "object":
+                return Type.STRING;
+            case "date":
+                return Type.DATE;
+            default:
+                return Type.INVALID;
+        }
+    }
+
+    private static Object toDorisLiteral(Expr expr) {
+        if (!expr.isLiteral()) {
+            return null;
+        }
+        if (expr instanceof BoolLiteral) {
+            BoolLiteral boolLiteral = (BoolLiteral) expr;
+            return boolLiteral.getValue();
+        } else if (expr instanceof DateLiteral) {
+            DateLiteral dateLiteral = (DateLiteral) expr;
+            return dateLiteral.getLongValue();
+        } else if (expr instanceof DecimalLiteral) {
+            DecimalLiteral decimalLiteral = (DecimalLiteral) expr;
+            return decimalLiteral.getValue();
+        } else if (expr instanceof FloatLiteral) {
+            FloatLiteral floatLiteral = (FloatLiteral) expr;
+            return floatLiteral.getValue();
+        } else if (expr instanceof IntLiteral) {
+            IntLiteral intLiteral = (IntLiteral) expr;
+            return intLiteral.getValue();
+        } else if (expr instanceof LargeIntLiteral) {
+            LargeIntLiteral largeIntLiteral = (LargeIntLiteral) expr;
+            return largeIntLiteral.getLongValue();
+        } else if (expr instanceof StringLiteral) {
+            StringLiteral stringLiteral = (StringLiteral) expr;
+            return stringLiteral.getStringValue();
+        }
+        return null;
+    }
+
+    /**
+     * Generate url for be to query es.
+     **/
+    public static EsUrls genEsUrls(String index, String type, boolean docValueMode, long limit, long batchSize) {
+        String filterPath = docValueMode ? "filter_path=_scroll_id,hits.total,hits.hits._score,hits.hits.fields"
+                : "filter_path=_scroll_id,hits.hits._source,hits.total,hits.hits._id";
+        if (limit <= 0) {
+            StringBuilder initScrollUrl = new StringBuilder();
+            StringBuilder nextScrollUrl = new StringBuilder();
+            initScrollUrl.append("/").append(index);
+            if (StringUtils.isNotBlank(type)) {
+                initScrollUrl.append("/").append(type);
+            }
+            initScrollUrl.append("/_search?scroll=5m&").append(filterPath).append("&terminate_after=")

Review Comment:
   Should the `5m` be configurable?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org