You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2021/05/08 02:52:53 UTC

[incubator-pinot] branch master updated: JSON column datatype support. (#6878)

This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new bd4239f  JSON column datatype support. (#6878)
bd4239f is described below

commit bd4239fc6908096f60ead9f1ee2c3576f256618b
Author: Amrish Lal <am...@gmail.com>
AuthorDate: Fri May 7 19:52:24 2021 -0700

    JSON column datatype support. (#6878)
    
    * JSON column datatype support.
    
    * code review changes + additional test cases.
    
    * code review changes.
    
    * Cleanup.
    
    * Rebuild.
    
    * Adjust for UTC based timestamp.
    
    * Adjust for UTC based timestamp.
    
    * Adjust for UTC based timestamp.
    
    * Cleanup.
    
    * code review changes.
    
    * Fix test case.
    
    * add TODO comment.
    
    * add TODO comment.
    
    * Rebuild.
    
    * Rebuild.
---
 .../org/apache/pinot/common/utils/DataSchema.java  |  9 ++
 .../apache/pinot/common/utils/PinotDataType.java   | 86 ++++++++++++++++++-
 .../apache/pinot/common/data/FieldSpecTest.java    | 12 +++
 .../pinot/common/utils/PinotDataTypeTest.java      | 98 ++++++++++++++++------
 .../transform/function/BaseTransformFunction.java  |  2 +
 .../transform/function/CastTransformFunction.java  |  3 +
 ...tchPredicateTest.java => JsonDatatypeTest.java} | 58 +++++--------
 .../pinot/queries/JsonMatchPredicateTest.java      |  3 +
 .../ColumnMinMaxValueGenerator.java                |  8 +-
 .../recordtransformer/RecordTransformerTest.java   |  7 ++
 .../java/org/apache/pinot/spi/data/FieldSpec.java  |  9 ++
 .../java/org/apache/pinot/spi/data/Schema.java     |  1 +
 .../java/org/apache/pinot/spi/utils/JsonUtils.java |  1 +
 13 files changed, 230 insertions(+), 67 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
index 409fd80..b871c0e 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
@@ -243,6 +243,7 @@ public class DataSchema {
     BOOLEAN /* Stored as INT */,
     TIMESTAMP /* Stored as LONG */,
     STRING,
+    JSON /* Stored as STRING */,
     BYTES,
     OBJECT,
     INT_ARRAY,
@@ -260,6 +261,8 @@ public class DataSchema {
           return INT;
         case TIMESTAMP:
           return LONG;
+        case JSON:
+          return STRING;
         default:
           return this;
       }
@@ -308,6 +311,8 @@ public class DataSchema {
           return DataType.TIMESTAMP;
         case STRING:
           return DataType.STRING;
+        case JSON:
+          return DataType.JSON;
         case BYTES:
           return DataType.BYTES;
         default:
@@ -334,6 +339,7 @@ public class DataSchema {
         case TIMESTAMP:
           return new Timestamp((Long) value);
         case STRING:
+        case JSON:
           return value.toString();
         case BYTES:
           return ((ByteArray) value).getBytes();
@@ -421,6 +427,7 @@ public class DataSchema {
         case TIMESTAMP:
           return new Timestamp((Long) value).toString();
         case STRING:
+        case JSON:
           return value.toString();
         case BYTES:
           return ((ByteArray) value).toHexString();
@@ -495,6 +502,8 @@ public class DataSchema {
           return TIMESTAMP;
         case STRING:
           return STRING;
+        case JSON:
+          return JSON;
         case BYTES:
           return BYTES;
         default:
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
index ee9b02e..43eaf30 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
@@ -19,12 +19,14 @@
 package org.apache.pinot.common.utils;
 
 import java.sql.Timestamp;
+import java.util.Base64;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.utils.BooleanUtils;
 import org.apache.pinot.spi.utils.BytesUtils;
+import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.spi.utils.TimestampUtils;
 
 
@@ -538,6 +540,60 @@ public enum PinotDataType {
     }
   },
 
+  JSON {
+    @Override
+    public int toInt(Object value) {
+      return Integer.parseInt(value.toString().trim());
+    }
+
+    @Override
+    public long toLong(Object value) {
+      return Long.parseLong(value.toString().trim());
+    }
+
+    @Override
+    public float toFloat(Object value) {
+      return Float.parseFloat(value.toString());
+    }
+
+    @Override
+    public double toDouble(Object value) {
+      return Double.parseDouble(value.toString());
+    }
+
+    @Override
+    public boolean toBoolean(Object value) {
+      return Boolean.parseBoolean(value.toString().trim());
+    }
+
+    @Override
+    public Timestamp toTimestamp(Object value) {
+      return TimestampUtils.toTimestamp(value.toString().trim());
+    }
+
+    @Override
+    public String toString(Object value) {
+      return value.toString();
+    }
+
+    @Override
+    public byte[] toBytes(Object value) {
+      // Base64 encoding is the commonly used mechanism for encoding binary data in JSON documents. Note that
+      // toJson function converts byte[] into a Base64 encoded json string value.
+      try {
+        return Base64.getDecoder().decode(value.toString());
+      } catch (Exception e) {
+        throw new RuntimeException(
+            "Unable to convert JSON base64 encoded string value to BYTES. Input value: " + value, e);
+      }
+    }
+
+    @Override
+    public String convert(Object value, PinotDataType sourceType) {
+      return sourceType.toJson(value);
+    }
+  },
+
   BYTES {
     @Override
     public int toInt(Object value) {
@@ -717,7 +773,8 @@ public enum PinotDataType {
   OBJECT_ARRAY;
 
   /**
-   * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString() and toBytes() for single-value types.
+   * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString(), and
+   * toBytes() for single-value types.
    */
 
   public int toInt(Object value) {
@@ -748,6 +805,23 @@ public enum PinotDataType {
     return getSingleValueType().toString(toObjectArray(value)[0]);
   }
 
+
+  public String toJson(Object value) {
+    if (value instanceof String) {
+      try {
+        return JsonUtils.stringToJsonNode((String) value).toString();
+      } catch (Exception e) {
+        throw new RuntimeException("Unable to convert String into JSON. Input value: " + value, e);
+      }
+    } else {
+      try {
+        return JsonUtils.objectToString(value).toString();
+      } catch (Exception e) {
+        throw new RuntimeException("Unable to convert " + value.getClass().getCanonicalName() + " to JSON. Input value: " + value, e);
+      }
+    }
+  }
+
   public byte[] toBytes(Object value) {
     return getSingleValueType().toBytes(toObjectArray(value)[0]);
   }
@@ -936,7 +1010,7 @@ public enum PinotDataType {
   }
 
   public Object convert(Object value, PinotDataType sourceType) {
-    throw new UnsupportedOperationException("Cannot convert value form " + sourceType + " to " + this);
+    throw new UnsupportedOperationException("Cannot convert value from " + sourceType + " to " + this);
   }
 
   /**
@@ -1011,6 +1085,12 @@ public enum PinotDataType {
         } else {
           throw new IllegalStateException("There is no multi-value type for TIMESTAMP");
         }
+      case JSON:
+        if (fieldSpec.isSingleValueField()) {
+          return PinotDataType.JSON;
+        } else {
+          throw new IllegalStateException("There is no multi-value type for JSON");
+        }
       case STRING:
         return fieldSpec.isSingleValueField() ? PinotDataType.STRING : PinotDataType.STRING_ARRAY;
       case BYTES:
@@ -1045,6 +1125,8 @@ public enum PinotDataType {
         return TIMESTAMP;
       case STRING:
         return STRING;
+      case JSON:
+        return JSON;
       case BYTES:
         return BYTES;
       case INT_ARRAY:
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
index aa7d31e..a020ebc 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
@@ -58,6 +58,7 @@ public class FieldSpecTest {
     Assert.assertEquals(BOOLEAN.getStoredType(), INT);
     Assert.assertEquals(TIMESTAMP.getStoredType(), LONG);
     Assert.assertEquals(STRING.getStoredType(), STRING);
+    Assert.assertEquals(JSON.getStoredType(), STRING);
     Assert.assertEquals(BYTES.getStoredType(), BYTES);
 
     Assert.assertEquals(INT.size(), Integer.BYTES);
@@ -106,6 +107,17 @@ public class FieldSpecTest {
     Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode());
     Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null");
 
+    // Single-value json type dimension field with max length and default null value.
+    fieldSpec1 = new DimensionFieldSpec();
+    fieldSpec1.setName("svDimension");
+    fieldSpec1.setDataType(JSON);
+    fieldSpec1.setMaxLength(20000);
+    fieldSpec2 = new DimensionFieldSpec("svDimension", JSON, true, 20000, null);
+    Assert.assertEquals(fieldSpec1, fieldSpec2);
+    Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString());
+    Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode());
+    Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null");
+
     // Multi-value dimension field.
     fieldSpec1 = new DimensionFieldSpec();
     fieldSpec1.setName("mvDimension");
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
index ca75097..2970b4b 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
@@ -18,6 +18,11 @@
  */
 package org.apache.pinot.common.utils;
 
+import java.sql.Timestamp;
+import java.time.LocalDateTime;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 import org.testng.annotations.Test;
 
 import static org.apache.pinot.common.utils.PinotDataType.*;
@@ -28,16 +33,16 @@ import static org.testng.Assert.fail;
 
 public class PinotDataTypeTest {
   private static final PinotDataType[] SOURCE_TYPES =
-      {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY};
+      {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, JSON, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY};
   private static final Object[] SOURCE_VALUES =
-      {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}};
+      {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", "123 ", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}};
   private static final PinotDataType[] DEST_TYPES =
       {INTEGER, LONG, FLOAT, DOUBLE, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY};
   private static final Object[] EXPECTED_DEST_VALUES =
       {123, 123L, 123f, 123d, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}};
   private static final String[] EXPECTED_STRING_VALUES =
       {Byte.toString((byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString(
-          123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", Byte.toString(
+          123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", "123 ", Byte.toString(
           (byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString(
           123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123"};
 
@@ -76,6 +81,9 @@ public class PinotDataTypeTest {
     assertEquals(DOUBLE.convert(false, BOOLEAN), 0d);
     assertEquals(STRING.convert(true, BOOLEAN), "true");
     assertEquals(STRING.convert(false, BOOLEAN), "false");
+
+    assertEquals(BOOLEAN.convert("true", JSON), true);
+    assertEquals(BOOLEAN.convert("false", JSON), false);
   }
 
   @Test
@@ -89,11 +97,30 @@ public class PinotDataTypeTest {
     assertEquals(STRING.convert(new byte[]{0, 1}, BYTES), "0001");
     assertEquals(BYTES.convert("0001", STRING), new byte[]{0, 1});
     assertEquals(BYTES.convert(new byte[]{0, 1}, BYTES), new byte[]{0, 1});
+    assertEquals(BYTES.convert("AAE=", JSON), new byte[]{0,1});
     assertEquals(BYTES.convert(new Byte[]{0, 1}, BYTE_ARRAY), new byte[]{0, 1});
     assertEquals(BYTES.convert(new String[]{"0001"}, STRING_ARRAY), new byte[]{0, 1});
   }
 
   @Test
+  public void testTimestamp() {
+    Timestamp timestamp = Timestamp.valueOf(LocalDateTime.now());
+    assertEquals(TIMESTAMP.convert(timestamp.getTime(), LONG), timestamp);
+    assertEquals(TIMESTAMP.convert(timestamp.toString(), STRING), timestamp);
+    assertEquals(TIMESTAMP.convert(timestamp.getTime(), JSON), timestamp);
+    assertEquals(TIMESTAMP.convert(timestamp.toString(), JSON), timestamp);
+  }
+
+  @Test
+  public void testJSON() {
+    assertEquals(JSON.convert(false, BOOLEAN), "false");
+    assertEquals(JSON.convert(true, BOOLEAN), "true");
+    assertEquals(JSON.convert(new byte[]{0, 1}, BYTES), "\"AAE=\""); // Base64 encoding.
+    assertEquals(JSON.convert("{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}", STRING), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}");
+    assertEquals(JSON.convert(new Timestamp(1620324238610l), TIMESTAMP), "1620324238610");
+  }
+
+  @Test
   public void testObject() {
     assertEquals(OBJECT.toInt(new NumberObject("123")), 123);
     assertEquals(OBJECT.toLong(new NumberObject("123")), 123L);
@@ -102,43 +129,64 @@ public class PinotDataTypeTest {
     assertTrue(OBJECT.toBoolean(new NumberObject("123")));
     assertEquals(OBJECT.toTimestamp(new NumberObject("123")).getTime(), 123L);
     assertEquals(OBJECT.toString(new NumberObject("123")), "123");
+    assertEquals(OBJECT.toJson(getGenericTestObject()), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}");
     assertEquals(OBJECT_ARRAY.getSingleValueType(), OBJECT);
   }
 
+  private static Object getGenericTestObject() {
+    Map<String, Object> map1 = new HashMap<>();
+    map1.put("array", Arrays.asList(-5.4,4, "2"));
+    map1.put("key1", "value");
+    map1.put("key2", null);
+
+    Map<String, Object> map2 = new HashMap<>();
+    map2.put("map", map1);
+    map2.put("bytes", new byte[]{0,1});
+    map2.put("timestamp", new Timestamp(1620324238610l));
+
+    return map2;
+  }
+
   @Test
   public void testInvalidConversion() {
     for (PinotDataType sourceType : values()) {
-      if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES) {
-        assertInvalidConversion(sourceType, BYTES);
+      if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES && sourceType != JSON) {
+        assertInvalidConversion(null, sourceType, BYTES, UnsupportedOperationException.class);
       }
     }
 
-    assertInvalidConversion(BYTES, INTEGER);
-    assertInvalidConversion(BYTES, LONG);
-    assertInvalidConversion(BYTES, FLOAT);
-    assertInvalidConversion(BYTES, DOUBLE);
-    assertInvalidConversion(BYTES, INTEGER_ARRAY);
-    assertInvalidConversion(BYTES, LONG_ARRAY);
-    assertInvalidConversion(BYTES, FLOAT_ARRAY);
-    assertInvalidConversion(BYTES, DOUBLE_ARRAY);
+    assertInvalidConversion(null, BYTES, INTEGER, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, LONG, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, FLOAT, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, DOUBLE, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, INTEGER_ARRAY, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, LONG_ARRAY, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, FLOAT_ARRAY, UnsupportedOperationException.class);
+    assertInvalidConversion(null, BYTES, DOUBLE_ARRAY, UnsupportedOperationException.class);
 
     for (PinotDataType sourceType : values()) {
-      assertInvalidConversion(sourceType, BYTE);
-      assertInvalidConversion(sourceType, CHARACTER);
-      assertInvalidConversion(sourceType, SHORT);
-      assertInvalidConversion(sourceType, OBJECT);
-      assertInvalidConversion(sourceType, BYTE_ARRAY);
-      assertInvalidConversion(sourceType, CHARACTER_ARRAY);
-      assertInvalidConversion(sourceType, SHORT_ARRAY);
-      assertInvalidConversion(sourceType, OBJECT_ARRAY);
+      assertInvalidConversion(null, sourceType, BYTE, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, CHARACTER, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, SHORT, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, OBJECT, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, BYTE_ARRAY, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, CHARACTER_ARRAY, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, SHORT_ARRAY, UnsupportedOperationException.class);
+      assertInvalidConversion(null, sourceType, OBJECT_ARRAY, UnsupportedOperationException.class);
     }
+
+    assertInvalidConversion("xyz", STRING, JSON, RuntimeException.class);
+
   }
 
-  private void assertInvalidConversion(PinotDataType sourceType, PinotDataType destType) {
+  private void assertInvalidConversion(Object value, PinotDataType sourceType, PinotDataType destType,
+      Class expectedExceptionType) {
     try {
-      destType.convert(null, sourceType);
-    } catch (UnsupportedOperationException e) {
-      return;
+      destType.convert(value, sourceType);
+    } catch (Exception e) {
+      if (e.getClass().equals(expectedExceptionType)) {
+        return;
+      }
     }
     fail();
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
index b504202..66e9111 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
@@ -47,6 +47,8 @@ public abstract class BaseTransformFunction implements TransformFunction {
       new TransformResultMetadata(DataType.STRING, true, false);
   protected static final TransformResultMetadata STRING_MV_NO_DICTIONARY_METADATA =
       new TransformResultMetadata(DataType.STRING, false, false);
+  protected static final TransformResultMetadata JSON_SV_NO_DICTIONARY_METADATA =
+      new TransformResultMetadata(DataType.JSON, true, false);
   protected static final TransformResultMetadata BYTES_SV_NO_DICTIONARY_METADATA =
       new TransformResultMetadata(DataType.BYTES, true, false);
 
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
index 7d7f96c..8428249 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
@@ -76,6 +76,9 @@ public class CastTransformFunction extends BaseTransformFunction {
         case "VARCHAR":
           _resultMetadata = STRING_SV_NO_DICTIONARY_METADATA;
           break;
+        case "JSON":
+          _resultMetadata = JSON_SV_NO_DICTIONARY_METADATA;
+          break;
         default:
           throw new IllegalArgumentException("Unable to cast expression to type - " + targetType);
       }
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
similarity index 84%
copy from pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
copy to pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
index c85fe86..75a0537 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.core.common.Operator;
 import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
 import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
@@ -50,9 +51,9 @@ import org.testng.annotations.Test;
 
 
 /**
- * Test cases verifying evaluation of predicate with expressions that contain numerical values of different types.
+ * Test cases verifying query evaluation against column of type JSON.
  */
-public class JsonMatchPredicateTest extends BaseQueriesTest {
+public class JsonDatatypeTest extends BaseQueriesTest {
   private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest");
   private static final String RAW_TABLE_NAME = "testTable";
   private static final String SEGMENT_NAME = "testSegment";
@@ -63,7 +64,7 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
   private static final String STRING_COLUMN = "stringColumn";
   private static final Schema SCHEMA =
       new Schema.SchemaBuilder().addSingleValueDimension(INT_COLUMN, FieldSpec.DataType.INT)
-          .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.STRING)
+          .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.JSON)
           .addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING).build();
   private static final TableConfig TABLE_CONFIG =
       new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
@@ -143,18 +144,34 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
     _indexSegments = Arrays.asList(immutableSegment, immutableSegment);
   }
 
+  /** Verify result column type of a simple select query against JSON column */
+  @Test
+  public void testSimpleSelectOnJsonColumn() {
+    try {
+      Operator operator = getOperatorForSqlQuery("select jsonColumn FROM testTable");
+      IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock();
+      Collection<Object[]> rows = block.getSelectionResult();
+      Assert.assertEquals(rows.size(), 9);
+      Assert.assertEquals(block.getDataSchema().getColumnDataType(0), DataSchema.ColumnDataType.JSON);
+    } catch (IllegalStateException ise) {
+      Assert.assertTrue(true);
+    }
+  }
+
   /** Test filtering on string value associated with  JSON key*/
   @Test
   public void testExtractScalarWithStringFilter() {
     Operator operator = getOperatorForSqlQuery(
-        "select json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'");
+        "select intColumn, json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'");
     IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock();
     Collection<Object[]> rows = block.getSelectionResult();
     Assert.assertEquals(rows.size(), 1);
 
     Iterator<Object[]> iterator = rows.iterator();
     Assert.assertTrue(iterator.hasNext());
-    Assert.assertEquals(iterator.next()[0], "duck");
+    Object[] row = iterator.next();
+    Assert.assertEquals(row[0], 1);
+    Assert.assertEquals(row[1], "duck");
   }
 
   /** Test filtering on number value associated with  JSON key*/
@@ -282,37 +299,6 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
     Assert.assertEquals(iterator.next()[0], "goofy");
   }
 
-  /** Evaluate json_extract_scalar over string column that does not contain valid json data. */
-  @Test
-  public void testJsonExtractScalarAgainstInvalidJson() {
-    // json_extract_scalar throws exception since we are trying to parse a non-JSON string.
-    Operator operator1 = getOperatorForSqlQuery(
-        "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT') = 0");
-    try {
-      IntermediateResultsBlock block1 = (IntermediateResultsBlock) operator1.nextBlock();
-      Assert.fail("Expected query to fail with Exception.");
-    } catch (RuntimeException re) {
-      // JSON parsing exception expected.
-    }
-
-    // JSON data is stored in columns of type STRING, so there is nothing preventing the column from storing bad json
-    // string. Bad JSON string in columns will cause json_extract_scalar to throw an exception which would terminate
-    // query processing. However, when json_extract_scalar is used within the WHERE clause, we should return the
-    // default value instead of throwing exception. This will allow the predicate to be evaluated to either true or
-    // false and hence allow the query to complete successfully. Returning default value from json_extract_scalar is
-    // an undocumented feature. Ideally, json_extract_scalar should return NULL when it encounters bad JSON. However,
-    // NULL support is currently pending, so this is the best we can do.
-    Operator operator2 = getOperatorForSqlQuery(
-        "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT', 0) = 0");
-
-    IntermediateResultsBlock block2 = (IntermediateResultsBlock) operator2.nextBlock();
-    Collection<Object[]> rows = block2.getSelectionResult();
-
-    // None of the values in stringColumn are valid JSON. Hence, json_extract_scalar should default to '0' for all rows
-    // and count returned by the query should be 9 (same as number of rows in the table).
-    Assert.assertEquals(block2.getAggregationResult().get(0), 9L);
-  }
-
   @AfterClass
   public void tearDown()
       throws IOException {
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
index c85fe86..9325a2c 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
@@ -51,6 +51,9 @@ import org.testng.annotations.Test;
 
 /**
  * Test cases verifying evaluation of predicate with expressions that contain numerical values of different types.
+ * TODO: Update these test cases to: 1) use V2 JSON_MATCH function, 2) use multi-dimensional JSON array addressing,
+ * 3) do json_extract_scalar on a column other than the JSON_MATCH column, 4) query deeper levels of nesting, and
+ * 5) add test cases for GROUP BY on json_extract_scalar or path expressions.
  */
 public class JsonMatchPredicateTest extends BaseQueriesTest {
   private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest");
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
index d3bca59..8549893 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
@@ -64,7 +64,7 @@ public class ColumnMinMaxValueGenerator {
     Set<String> columnsToAddMinMaxValue = new HashSet<>(schema.getPhysicalColumnNames());
 
     // mode ALL - use all columns
-    // mode NON_METRIC - use all dimensions and time columns 
+    // mode NON_METRIC - use all dimensions and time columns
     // mode TIME - use only time columns
     switch (_columnMinMaxValueGeneratorMode) {
       case TIME:
@@ -89,7 +89,7 @@ public class ColumnMinMaxValueGenerator {
     }
 
     PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, ColumnIndexType.DICTIONARY);
-    DataType dataType = columnMetadata.getDataType();
+    DataType dataType = columnMetadata.getDataType().getStoredType();
     int length = columnMetadata.getCardinality();
     switch (dataType) {
       case INT:
@@ -146,8 +146,8 @@ public class ColumnMinMaxValueGenerator {
   private void saveMetadata()
       throws Exception {
     if (_minMaxValueAdded) {
-      // Commons Configuration 1.10 does not support file path containing '%'. 
-      // Explicitly providing the output stream for the file bypasses the problem. 
+      // Commons Configuration 1.10 does not support file path containing '%'.
+      // Explicitly providing the output stream for the file bypasses the problem.
       try (FileOutputStream fileOutputStream = new FileOutputStream(_segmentProperties.getFile())) {
         _segmentProperties.save(fileOutputStream);
       }
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index 0b5d7ee..eb8b031 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -43,6 +43,7 @@ public class RecordTransformerTest {
       .addSingleValueDimension("svFloat", DataType.FLOAT).addSingleValueDimension("svDouble", DataType.DOUBLE)
       .addSingleValueDimension("svBoolean", DataType.BOOLEAN).addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
       .addSingleValueDimension("svBytes", DataType.BYTES).addMultiValueDimension("mvInt", DataType.INT)
+      .addSingleValueDimension("svJson", DataType.JSON)
       .addMultiValueDimension("mvLong", DataType.LONG).addMultiValueDimension("mvFloat", DataType.FLOAT)
       .addMultiValueDimension("mvDouble", DataType.DOUBLE)
       // For sanitation
@@ -70,6 +71,7 @@ public class RecordTransformerTest {
     record.putValue("svBoolean", "true");
     record.putValue("svTimestamp", "2020-02-02 22:22:22.222");
     record.putValue("svBytes", "7b7b"/*new byte[]{123, 123}*/);
+    record.putValue("svJson", "{\"first\": \"daffy\", \"last\": \"duck\"}");
     record.putValue("mvInt", new Object[]{123L});
     record.putValue("mvLong", Collections.singletonList(123f));
     record.putValue("mvFloat", new Double[]{123d});
@@ -141,6 +143,7 @@ public class RecordTransformerTest {
       assertEquals(record.getValue("svBoolean"), 1);
       assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime());
       assertEquals(record.getValue("svBytes"), new byte[]{123, 123});
+      assertEquals(record.getValue("svJson"), "{\"first\":\"daffy\",\"last\":\"duck\"}");
       assertEquals(record.getValue("mvInt"), new Object[]{123});
       assertEquals(record.getValue("mvLong"), new Object[]{123L});
       assertEquals(record.getValue("mvFloat"), new Object[]{123f});
@@ -190,6 +193,7 @@ public class RecordTransformerTest {
     assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN);
     assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP);
     assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES);
+    assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON);
     assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT});
     assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG});
     assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT});
@@ -210,6 +214,7 @@ public class RecordTransformerTest {
     assertTrue(record.isNullValue("svBoolean"));
     assertTrue(record.isNullValue("svTimestamp"));
     assertTrue(record.isNullValue("svBytes"));
+    assertTrue(record.isNullValue("svJson"));
     assertTrue(record.isNullValue("mvInt"));
     assertTrue(record.isNullValue("mvLong"));
     assertTrue(record.isNullValue("mvDouble"));
@@ -234,6 +239,7 @@ public class RecordTransformerTest {
       assertEquals(record.getValue("svDouble"), 123d);
       assertEquals(record.getValue("svBoolean"), 1);
       assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime());
+      assertEquals(record.getValue("svJson"),"{\"first\":\"daffy\",\"last\":\"duck\"}");
       assertEquals(record.getValue("svBytes"), new byte[]{123, 123});
       assertEquals(record.getValue("mvInt"), new Object[]{123});
       assertEquals(record.getValue("mvLong"), new Object[]{123L});
@@ -259,6 +265,7 @@ public class RecordTransformerTest {
       assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN);
       assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP);
       assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES);
+      assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON);
       assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT});
       assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG});
       assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT});
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index 46196ff..f77facb 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -52,6 +52,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
   public static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN = 0;
   public static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP = 0L;
   public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_STRING = "null";
+  public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_JSON = "null";
   public static final byte[] DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES = new byte[0];
 
   public static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0;
@@ -229,6 +230,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
               return DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP;
             case STRING:
               return DEFAULT_DIMENSION_NULL_VALUE_OF_STRING;
+            case JSON:
+              return DEFAULT_DIMENSION_NULL_VALUE_OF_JSON;
             case BYTES:
               return DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES;
             default:
@@ -303,6 +306,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
           jsonNode.put(key, new Timestamp((Long) _defaultNullValue).toString());
           break;
         case STRING:
+        case JSON:
           jsonNode.put(key, (String) _defaultNullValue);
           break;
         case BYTES:
@@ -375,6 +379,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
     BOOLEAN /* Stored as INT */,
     TIMESTAMP /* Stored as LONG */,
     STRING,
+    JSON /* Stored as STRING */,
     BYTES,
     STRUCT,
     MAP,
@@ -392,6 +397,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
           return INT;
         case TIMESTAMP:
           return LONG;
+        case JSON:
+          return STRING;
         default:
           return this;
       }
@@ -451,6 +458,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
           case TIMESTAMP:
             return TimestampUtils.toMillisSinceEpoch(value);
           case STRING:
+          case JSON:
             return value;
           case BYTES:
             return BytesUtils.toBytes(value);
@@ -481,6 +489,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
           case TIMESTAMP:
             return TimestampUtils.toMillisSinceEpoch(value);
           case STRING:
+          case JSON:
             return value;
           case BYTES:
             return BytesUtils.toByteArray(value);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
index ebbce81..40b55d1 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
@@ -443,6 +443,7 @@ public final class Schema implements Serializable {
             case BOOLEAN:
             case TIMESTAMP:
             case STRING:
+            case JSON:
             case BYTES:
               break;
             default:
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 4077a53..a3e16a0 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -193,6 +193,7 @@ public class JsonUtils {
         return jsonValue.asBoolean();
       case TIMESTAMP:
       case STRING:
+      case JSON:
         return jsonValue.asText();
       case BYTES:
         try {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org