You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/07/12 01:44:25 UTC

[GitHub] [doris] xiaokang commented on a diff in pull request #10322: [feature](JSON datatype)Support JSON datatype

xiaokang commented on code in PR #10322:
URL: https://github.com/apache/doris/pull/10322#discussion_r918439471


##########
be/src/common/config.h:
##########
@@ -744,6 +744,11 @@ CONF_mInt32(string_type_length_soft_limit_bytes, "1048576");
 CONF_Validator(string_type_length_soft_limit_bytes,
                [](const int config) -> bool { return config > 0 && config <= 2147483643; });
 
+CONF_mInt32(json_type_length_soft_limit_bytes, "1048576");

Review Comment:
   how is soft limit used?



##########
be/src/common/env_config.h.in:
##########
@@ -19,6 +19,6 @@
 
 namespace doris {
 
-#cmakedefine HAVE_SCHED_GETCPU @HAVE_SCHED_GETCPU@
+#cmakedefine HAVE_SCHED_GETCPU @HAVE_SCHED_GETCPU @

Review Comment:
   a mistake?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java:
##########
@@ -648,22 +679,29 @@ public static boolean isImplicitCast(PrimitiveType type, PrimitiveType target) {
 
         compatibilityMatrix[CHAR.ordinal()][CHAR.ordinal()] = CHAR;
         compatibilityMatrix[CHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR;
+        compatibilityMatrix[CHAR.ordinal()][JSON.ordinal()] = JSON;  
         compatibilityMatrix[CHAR.ordinal()][STRING.ordinal()] = STRING;
         compatibilityMatrix[CHAR.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIMEV2.ordinal()] = INVALID_TYPE;
 
         compatibilityMatrix[VARCHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR;
         compatibilityMatrix[VARCHAR.ordinal()][STRING.ordinal()] = STRING;
+        compatibilityMatrix[VARCHAR.ordinal()][JSON.ordinal()] = JSON;  
         compatibilityMatrix[VARCHAR.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][TIME.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][TIMEV2.ordinal()] = INVALID_TYPE;
 
         compatibilityMatrix[STRING.ordinal()][STRING.ordinal()] = STRING;
         compatibilityMatrix[STRING.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE;
+        compatibilityMatrix[STRING.ordinal()][JSON.ordinal()] = STRING; 
         compatibilityMatrix[STRING.ordinal()][TIME.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][TIMEV2.ordinal()] = INVALID_TYPE;
 
+        compatibilityMatrix[JSON.ordinal()][JSON.ordinal()] = JSON;
+        compatibilityMatrix[JSON.ordinal()][STRING.ordinal()] = STRING;
+        compatibilityMatrix[JSON.ordinal()][VARCHAR.ordinal()] = VARCHAR;

Review Comment:
   missed CHAR



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java:
##########
@@ -648,22 +679,29 @@ public static boolean isImplicitCast(PrimitiveType type, PrimitiveType target) {
 
         compatibilityMatrix[CHAR.ordinal()][CHAR.ordinal()] = CHAR;
         compatibilityMatrix[CHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR;
+        compatibilityMatrix[CHAR.ordinal()][JSON.ordinal()] = JSON;  

Review Comment:
   According to the definition of compatibilityMatrix: 'A value of any of the two types could be assigned to a slot of the assignment-compatible type without loss of precision', the slot value of CHAR and JSON should be CHAR.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java:
##########
@@ -666,6 +666,9 @@ public String getSignatureString(Map<PrimitiveType, String> typeStringMap) {
             case VARCHAR:
                 sb.append(String.format(typeStringMap.get(dataType), getStrLen()));
                 break;
+            case JSON:
+                sb.append(String.format(typeStringMap.get(dataType), getStrLen()));

Review Comment:
   format is for VARCHAR(length) but not necessary for JSON



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java:
##########
@@ -1270,6 +1300,18 @@ public static Type getCmpType(Type t1, Type t2) {
                 || (t1ResultType == PrimitiveType.VARCHAR && t2ResultType == PrimitiveType.STRING)) {
             return Type.STRING;
         }
+        // TODO(wzy): support NUMERIC/CHAR cast to JSON
+        if (t1ResultType == PrimitiveType.JSON && t2ResultType == PrimitiveType.JSON) {
+            return Type.JSON;
+        }
+        if ((t1ResultType == PrimitiveType.JSON && t2ResultType == PrimitiveType.VARCHAR) 
+                || (t1ResultType == PrimitiveType.VARCHAR && t2ResultType == PrimitiveType.JSON)){
+            return Type.VARCHAR;
+        }
+        if ((t1ResultType == PrimitiveType.JSON && t2ResultType == PrimitiveType.STRING) 
+                || (t1ResultType == PrimitiveType.STRING && t2ResultType == PrimitiveType.JSON)){
+            return Type.SMALLINT;

Review Comment:
   should be STRING?



##########
be/src/vec/common/string_ref.h:
##########
@@ -73,6 +75,18 @@ struct StringRef {
     static StringRef from_string_val(StringVal sv) {
         return StringRef(reinterpret_cast<char*>(sv.ptr), sv.len);
     }
+
+    doris::JsonbDocument& to_jsonb_doc() const {

Review Comment:
   json related util function should be in a json util class instead of StringRef.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java:
##########
@@ -256,6 +266,7 @@ public enum PrimitiveType {
         builder.put(DATETIME, DATETIMEV2);
         builder.put(DATETIME, DECIMALV2);
         builder.put(DATETIME, VARCHAR);
+        builder.put(DATETIME, JSON);

Review Comment:
   DATEV2, DATETIMEV2 should also be handled



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java:
##########
@@ -75,7 +75,7 @@ public abstract class Type {
     public static final ScalarType DEFAULT_DATETIMEV2 = ScalarType.createDatetimeV2Type(0);
     public static final ScalarType DATETIMEV2 = DEFAULT_DATETIMEV2;
     public static final ScalarType DEFAULT_TIMEV2 = ScalarType.createTimeV2Type(0);
-    public static final ScalarType DECIMALV2 = DEFAULT_DECIMALV2;

Review Comment:
   shoule not delete DECIMALV2



##########
gensrc/thrift/Types.thrift:
##########
@@ -74,6 +74,7 @@ enum TPrimitiveType {
   CHAR,
   LARGEINT,
   VARCHAR,
+  JSON,

Review Comment:
   it's better to add new type to tail of enum.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java:
##########
@@ -1124,6 +1139,7 @@ public Integer getNumPrecRadix() {
         compatibilityMatrix[CHAR.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][STRING.ordinal()] = PrimitiveType.STRING;
+        compatibilityMatrix[CHAR.ordinal()][JSON.ordinal()] = PrimitiveType.INVALID_TYPE;

Review Comment:
   it's not consistent with TPrimitiveType



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java:
##########
@@ -648,22 +679,29 @@ public static boolean isImplicitCast(PrimitiveType type, PrimitiveType target) {
 
         compatibilityMatrix[CHAR.ordinal()][CHAR.ordinal()] = CHAR;
         compatibilityMatrix[CHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR;
+        compatibilityMatrix[CHAR.ordinal()][JSON.ordinal()] = JSON;  
         compatibilityMatrix[CHAR.ordinal()][STRING.ordinal()] = STRING;
         compatibilityMatrix[CHAR.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIMEV2.ordinal()] = INVALID_TYPE;
 
         compatibilityMatrix[VARCHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR;
         compatibilityMatrix[VARCHAR.ordinal()][STRING.ordinal()] = STRING;
+        compatibilityMatrix[VARCHAR.ordinal()][JSON.ordinal()] = JSON;  

Review Comment:
   just as CHAR



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java:
##########
@@ -496,6 +517,7 @@ public static boolean isImplicitCast(PrimitiveType type, PrimitiveType target) {
         compatibilityMatrix[BOOLEAN.ordinal()][DATETIMEV2.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[BOOLEAN.ordinal()][CHAR.ordinal()] = INVALID_TYPE;
         compatibilityMatrix[BOOLEAN.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE;
+        compatibilityMatrix[BOOLEAN.ordinal()][JSON.ordinal()] = INVALID_TYPE;  

Review Comment:
   it's not consistent with implicitCastMap



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java:
##########
@@ -1098,6 +1112,7 @@ public Integer getNumPrecRadix() {
         compatibilityMatrix[DATETIME.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIME.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIME.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DATETIME.ordinal()][JSON.ordinal()] = PrimitiveType.INVALID_TYPE;

Review Comment:
   DATEV2, DATETIMEV2 is missed



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java:
##########
@@ -164,6 +164,8 @@ public static ScalarFunction createBuiltinOperator(
                 case QUANTILE_STATE:
                     beFn += "_string_val";
                     break;
+                case JSON:
+                    beFn += "json_val";

Review Comment:
   non-vectorized function can be ignored



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java:
##########
@@ -0,0 +1,197 @@
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Objects;
+
+import com.google.common.base.Preconditions;
+import com.google.gson.JsonParser;
+import com.google.gson.JsonSyntaxException;
+
+import org.apache.doris.common.io.Text;
+import org.apache.doris.thrift.TExprNode;
+import org.apache.doris.thrift.TExprNodeType;
+import org.apache.doris.thrift.TJsonLiteral;
+import org.apache.logging.log4j.LogManager;
+import org.apache.doris.catalog.Type;
+import org.apache.logging.log4j.Logger;
+
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.qe.VariableVarConverters;
+
+public class JsonLiteral extends LiteralExpr {
+    private static final Logger LOG = LogManager.getLogger(JsonLiteral.class);
+    private JsonParser parser = new JsonParser();
+    private String value;
+    // Means the converted session variable need to be cast to int, such as "cast 'STRICT_TRANS_TABLES' to Integer".
+    private String beConverted = "";
+
+    public JsonLiteral() {
+        super();
+        type = Type.JSON;
+    }
+
+    public JsonLiteral(String value) throws AnalysisException {
+        try {
+            parser.parse(value);

Review Comment:
   poor performance to parse json in fe



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java:
##########
@@ -164,6 +164,8 @@ public static ScalarFunction createBuiltinOperator(
                 case QUANTILE_STATE:
                     beFn += "_string_val";
                     break;
+                case JSON:
+                    beFn += "json_val";

Review Comment:
   _json_val



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java:
##########
@@ -1145,7 +1162,14 @@ public Integer getNumPrecRadix() {
         compatibilityMatrix[STRING.ordinal()][DATETIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][QUANTILE_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[STRING.ordinal()][JSON.ordinal()] = PrimitiveType.STRING;
 
+        //JSON
+        compatibilityMatrix[JSON.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[JSON.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[JSON.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[JSON.ordinal()][QUANTILE_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[JSON.ordinal()][STRING.ordinal()] = PrimitiveType.STRING;

Review Comment:
   what about CHAR VARCHAR and many other types?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java:
##########
@@ -94,6 +94,7 @@ public abstract class ColumnType {
         schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
         schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.DATE.ordinal()] = true;
         schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true;
+        schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.JSON.ordinal()] = true;

Review Comment:
   if VARCHAR is allowed, STRING should also be allowed



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/JsonLiteral.java:
##########
@@ -0,0 +1,197 @@
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Objects;
+
+import com.google.common.base.Preconditions;
+import com.google.gson.JsonParser;
+import com.google.gson.JsonSyntaxException;
+
+import org.apache.doris.common.io.Text;
+import org.apache.doris.thrift.TExprNode;
+import org.apache.doris.thrift.TExprNodeType;
+import org.apache.doris.thrift.TJsonLiteral;
+import org.apache.logging.log4j.LogManager;
+import org.apache.doris.catalog.Type;
+import org.apache.logging.log4j.Logger;
+
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.qe.VariableVarConverters;
+
+public class JsonLiteral extends LiteralExpr {
+    private static final Logger LOG = LogManager.getLogger(JsonLiteral.class);
+    private JsonParser parser = new JsonParser();
+    private String value;
+    // Means the converted session variable need to be cast to int, such as "cast 'STRICT_TRANS_TABLES' to Integer".
+    private String beConverted = "";
+
+    public JsonLiteral() {
+        super();
+        type = Type.JSON;
+    }
+
+    public JsonLiteral(String value) throws AnalysisException {
+        try {
+            parser.parse(value);
+        } catch (JsonSyntaxException e) {
+            throw new AnalysisException("Invalid json literal: " + e.getMessage());
+        }
+        this.value = value;
+        type = Type.JSON;
+        analysisDone();
+    }
+
+    protected JsonLiteral(JsonLiteral other) {
+        super(other);
+        value = other.value;
+    }
+
+    public void setBeConverted(String val) {
+        this.beConverted = val;
+    }
+
+    @Override
+    public Expr clone() {
+        return new JsonLiteral(this);
+    }
+
+    @Override
+    public int compareLiteral(LiteralExpr expr) {
+        if (expr instanceof NullLiteral) {
+            return 1;
+        }
+        if (expr == MaxLiteral.MAX_VALUE) {
+            return -1;
+        }
+        // compare string with utf-8 byte array, same with DM,BE,StorageEngine
+        byte[] thisBytes = null;
+        byte[] otherBytes = null;
+        try {
+            thisBytes = value.getBytes("UTF-8");
+            otherBytes = expr.getStringValue().getBytes("UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            Preconditions.checkState(false);
+        }
+
+        int minLength = Math.min(thisBytes.length, otherBytes.length);
+        int i = 0;
+        for (i = 0; i < minLength; i++) {
+            if (thisBytes[i] < otherBytes[i]) {
+                return -1;
+            } else if (thisBytes[i] > otherBytes[i]) {
+                return 1;
+            }
+        }
+        if (thisBytes.length > otherBytes.length) {
+            if (thisBytes[i] == 0x00) {
+                return 0;
+            } else {
+                return 1;
+            }
+        } else if (thisBytes.length < otherBytes.length) {
+            if (otherBytes[i] == 0x00) {
+                return 0;
+            } else {
+                return -1;
+            }
+        } else {
+            return 0;
+        }
+    }
+
+    public String getValue() {
+        return value;
+    }
+
+    @Override
+    public boolean isMinValue() {
+        return false;
+    }
+
+    @Override
+    public String toSqlImpl() {
+        return "'" + value.replaceAll("'", "''") + "'";
+    }
+
+    @Override
+    protected void toThrift(TExprNode msg) {
+        msg.node_type = TExprNodeType.JSON_LITERAL;
+        msg.json_literal = new TJsonLiteral(getUnescapedValue());
+    }
+
+    public String getUnescapedValue() {
+        // Unescape string exactly like Hive does. Hive's method assumes
+        // quotes so we add them here to reuse Hive's code.
+        return value;
+    }
+
+    public String getJsonValue() {
+        return value;
+    }
+
+    @Override
+    public long getLongValue() {
+        return 0;

Review Comment:
   should throw AnalysisException if not supported at present



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org