You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2019/03/19 05:41:49 UTC

[incubator-pinot] 03/06: Adding support for Object Type

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch nested-object-indexing-1
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 74f8e6eede16c3a8eba97d449feeeb70d486b76b
Author: kishore gopalakrishna <g....@gmail.com>
AuthorDate: Sun Jan 20 17:49:05 2019 -0800

    Adding support for Object Type
---
 .../org/apache/pinot/common/data/FieldSpec.java    | 23 +++++-
 .../org/apache/pinot/common/data/PinotObject.java  | 58 +++++++++++++++
 .../pinot/common/data/objects/JSONObject.java      | 83 ++++++++++++++++++++++
 .../pinot/common/data/objects/MapObject.java       | 66 +++++++++++++++++
 .../pinot/common/data/objects/TextObject.java      | 53 ++++++++++++++
 .../creator/impl/SegmentColumnarIndexCreator.java  |  1 +
 .../core/segment/creator/impl/V1Constants.java     |  1 +
 .../pinot/core/segment/index/ColumnMetadata.java   | 21 ++++--
 8 files changed, 301 insertions(+), 5 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/data/FieldSpec.java b/pinot-common/src/main/java/org/apache/pinot/common/data/FieldSpec.java
index 30be748..080f0e7 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/data/FieldSpec.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/data/FieldSpec.java
@@ -86,6 +86,10 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, ConfigNodeLife
 
   @ConfigKey("virtualColumnProvider")
   protected String _virtualColumnProvider;
+  
+  //Complex type that can be constructed from raw bytes stored e.g. map, json, text
+  @ConfigKey("objectType")
+  protected String _objectType;
 
   // Default constructor required by JSON de-serializer. DO NOT REMOVE.
   public FieldSpec() {
@@ -98,15 +102,21 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, ConfigNodeLife
   public FieldSpec(String name, DataType dataType, boolean isSingleValueField, @Nullable Object defaultNullValue) {
     this(name, dataType, isSingleValueField, DEFAULT_MAX_LENGTH, defaultNullValue);
   }
-
+  
   public FieldSpec(String name, DataType dataType, boolean isSingleValueField, int maxLength,
       @Nullable Object defaultNullValue) {
+    this(name, dataType, isSingleValueField, maxLength, defaultNullValue, null);
+  }
+  
+  public FieldSpec(String name, DataType dataType, boolean isSingleValueField, int maxLength,
+      @Nullable Object defaultNullValue, @Nullable String objectType) {
     _name = name;
     _dataType = dataType.getStoredType();
     _isSingleValueField = isSingleValueField;
     _maxLength = maxLength;
     setDefaultNullValue(defaultNullValue);
   }
+  
 
   public abstract FieldType getFieldType();
 
@@ -183,6 +193,16 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, ConfigNodeLife
       _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue);
     }
   }
+  
+  
+  
+  public String getObjectType() {
+    return _objectType;
+  }
+
+  public void setObjectType(String objectType) {
+    _objectType = objectType;
+  }
 
   private static Object getDefaultNullValue(FieldType fieldType, DataType dataType,
       @Nullable String stringDefaultNullValue) {
@@ -353,6 +373,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, ConfigNodeLife
     result = EqualityUtils.hashCodeOf(result, _isSingleValueField);
     result = EqualityUtils.hashCodeOf(result, getStringValue(_defaultNullValue));
     result = EqualityUtils.hashCodeOf(result, _maxLength);
+    result = EqualityUtils.hashCodeOf(result, _objectType);
     return result;
   }
 
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/data/PinotObject.java b/pinot-common/src/main/java/org/apache/pinot/common/data/PinotObject.java
new file mode 100644
index 0000000..3f1ca33
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/common/data/PinotObject.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.data;
+
+import java.util.List;
+
+/**
+ * Common interface for complex Object types such as HyperLogLog, Map, JSON etc.
+ * Flow to convert byte[] to PinotObject
+ * - compute the objectTypeClass from objectType (from schema/fieldSpec.objectType)
+ * - Instantiate PinotObject instance
+ * - call init(bytes)
+ * - expects all other methods to be implemented.
+ */
+public interface PinotObject {
+
+  /**
+   * Initializes the PinotObject from byte[]. Note that this method can be repeatedly called on the
+   * same instance of PinotObject.
+   * @param bytes
+   */
+  void init(byte[] bytes);
+
+  /**
+   * @return serialized byte form
+   */
+  byte[] toBytes();
+
+  /**
+   * @return list of properties in this object. Note, this can return nested properties using dot
+   *         notation
+   *         
+   */
+  List<String> getPropertyNames();
+
+  /**
+   * @param fieldName
+   * @return the value of the property, it can be a single object or a list of objects.
+   */
+  Object getProperty(String propertyName);
+
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/data/objects/JSONObject.java b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/JSONObject.java
new file mode 100644
index 0000000..ae7f82d
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/JSONObject.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.data.objects;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.pinot.common.data.PinotObject;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.google.common.collect.Lists;
+
+public class JSONObject implements PinotObject {
+  private static ObjectMapper _MAPPER = new ObjectMapper();
+  private JsonNode _jsonNode;
+
+  @Override
+  public void init(byte[] bytes) {
+    try {
+      _jsonNode = _MAPPER.readTree(bytes);
+    } catch (IOException e) {
+      _jsonNode = JsonNodeFactory.instance.objectNode();
+    }
+  }
+
+  @Override
+  public byte[] toBytes() {
+    try {
+      return _MAPPER.writeValueAsBytes(_jsonNode);
+    } catch (JsonProcessingException e) {
+      return "{}".getBytes();
+    }
+  }
+
+  @Override
+  public List<String> getPropertyNames() {
+    List<String> fields = Lists.newArrayList();
+    // TODO: Add support to iterate recursively
+    Iterator<String> iterator = _jsonNode.fieldNames();
+    while (iterator.hasNext()) {
+      String fieldName = (String) iterator.next();
+      fields.add(fieldName);
+    }
+    return fields;
+  }
+
+  @Override
+  public Object getProperty(String fieldName) {
+    JsonNode jsonNode = _jsonNode.get(fieldName);
+    if (jsonNode.isArray()) {
+      Iterator<JsonNode> iterator = jsonNode.iterator();
+      List<String> list = new ArrayList<String>();
+      while (iterator.hasNext()) {
+        list.add(iterator.next().asText());
+      }
+      return list;
+    } else {
+      return jsonNode.asText();
+    }
+  }
+
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/data/objects/MapObject.java b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/MapObject.java
new file mode 100644
index 0000000..606137d
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/MapObject.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.data.objects;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.pinot.common.data.PinotObject;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Lists;
+
+public class MapObject implements PinotObject {
+
+  private static ObjectMapper _MAPPER = new ObjectMapper();
+  Map<String, Object> _stringMap;
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void init(byte[] bytes) {
+    try {
+      _stringMap = _MAPPER.readValue(bytes, Map.class);
+    } catch (IOException e) {
+      _stringMap = Collections.emptyMap();
+    }
+  }
+
+  @Override
+  public byte[] toBytes() {
+    try {
+      return _MAPPER.writeValueAsBytes(_stringMap);
+    } catch (JsonProcessingException e) {
+      return "{}".getBytes();
+    }
+  }
+
+  @Override
+  public List<String> getPropertyNames() {
+    return Lists.newArrayList(_stringMap.keySet());
+  }
+
+  @Override
+  public Object getProperty(String field) {
+    return _stringMap.get(field);
+  }
+
+}
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/data/objects/TextObject.java b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/TextObject.java
new file mode 100644
index 0000000..cf5f27e
--- /dev/null
+++ b/pinot-common/src/main/java/org/apache/pinot/common/data/objects/TextObject.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.data.objects;
+
+import java.util.List;
+
+import org.apache.pinot.common.data.PinotObject;
+
+import com.google.common.collect.Lists;
+
+public class TextObject implements PinotObject {
+
+  byte[] _bytes;
+  private static List<String> _FIELDS = Lists.newArrayList("Content");
+
+  @Override
+  public void init(byte[] bytes) {
+    _bytes = bytes;
+  }
+
+  @Override
+  public byte[] toBytes() {
+    return _bytes;
+  }
+
+  @Override
+  public List<String> getPropertyNames() {
+    return _FIELDS;
+  }
+
+  @Override
+  public Object getProperty(String field) {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/SegmentColumnarIndexCreator.java
index 14ba041..6e7642d 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -404,6 +404,7 @@ public class SegmentColumnarIndexCreator implements SegmentCreator {
         String.valueOf(PinotDataBitSet.getNumBitsPerValue(cardinality - 1)));
     properties.setProperty(getKeyFor(column, DICTIONARY_ELEMENT_SIZE), String.valueOf(dictionaryElementSize));
     properties.setProperty(getKeyFor(column, COLUMN_TYPE), String.valueOf(fieldSpec.getFieldType()));
+    properties.setProperty(getKeyFor(column, OBJECT_TYPE), fieldSpec.getObjectType());
     properties.setProperty(getKeyFor(column, IS_SORTED), String.valueOf(columnIndexCreationInfo.isSorted()));
     properties.setProperty(getKeyFor(column, HAS_NULL_VALUE), String.valueOf(columnIndexCreationInfo.hasNulls()));
     properties.setProperty(getKeyFor(column, HAS_DICTIONARY), String.valueOf(hasDictionary));
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/V1Constants.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/V1Constants.java
index 4d6dbdb..3536018 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/V1Constants.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/V1Constants.java
@@ -119,6 +119,7 @@ public class V1Constants {
       public static final String BITS_PER_ELEMENT = "bitsPerElement";
       public static final String DICTIONARY_ELEMENT_SIZE = "lengthOfEachEntry";
       public static final String COLUMN_TYPE = "columnType";
+      public static final String OBJECT_TYPE = "objectType";
       public static final String IS_SORTED = "isSorted";
       public static final String HAS_NULL_VALUE = "hasNullValue";
       public static final String HAS_DICTIONARY = "hasDictionary";
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/ColumnMetadata.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/ColumnMetadata.java
index 1a37ffa..0ddca57 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/ColumnMetadata.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/ColumnMetadata.java
@@ -58,6 +58,7 @@ public class ColumnMetadata {
   private final int bitsPerElement;
   private final int columnMaxLength;
   private final FieldType fieldType;
+  private final String objectType;
   private final boolean isSorted;
   @JsonProperty
   private final boolean containsNulls;
@@ -98,6 +99,7 @@ public class ColumnMetadata {
     builder.setBitsPerElement(config.getInt(getKeyFor(column, BITS_PER_ELEMENT)));
     builder.setColumnMaxLength(config.getInt(getKeyFor(column, DICTIONARY_ELEMENT_SIZE)));
     builder.setFieldType(FieldType.valueOf(config.getString(getKeyFor(column, COLUMN_TYPE)).toUpperCase()));
+    builder.setObjectType(config.getString(getKeyFor(column, OBJECT_TYPE), null));
     builder.setIsSorted(config.getBoolean(getKeyFor(column, IS_SORTED)));
     builder.setContainsNulls(config.getBoolean(getKeyFor(column, HAS_NULL_VALUE)));
     builder.setHasDictionary(config.getBoolean(getKeyFor(column, HAS_DICTIONARY), true));
@@ -218,6 +220,7 @@ public class ColumnMetadata {
     private int bitsPerElement;
     private int columnMaxLength;
     private FieldType fieldType;
+    private String objectType;
     private boolean isSorted;
     private boolean containsNulls;
     private boolean hasDictionary;
@@ -286,6 +289,11 @@ public class ColumnMetadata {
       return this;
     }
 
+    public Builder setObjectType(String objectType) {
+      this.objectType = objectType;
+      return this;
+    }
+
     public Builder setIsSorted(boolean isSorted) {
       this.isSorted = isSorted;
       return this;
@@ -397,16 +405,16 @@ public class ColumnMetadata {
 
     public ColumnMetadata build() {
       return new ColumnMetadata(columnName, cardinality, totalDocs, totalRawDocs, totalAggDocs, dataType,
-          bitsPerElement, columnMaxLength, fieldType, isSorted, containsNulls, hasDictionary, hasInvertedIndex,
-          isSingleValue, maxNumberOfMultiValues, totalNumberOfEntries, isAutoGenerated, isVirtual,
+          bitsPerElement, columnMaxLength, fieldType, objectType, isSorted, containsNulls, hasDictionary,
+          hasInvertedIndex, isSingleValue, maxNumberOfMultiValues, totalNumberOfEntries, isAutoGenerated, isVirtual,
           defaultNullValueString, timeUnit, paddingCharacter, derivedMetricType, fieldSize, originColumnName, minValue,
           maxValue, partitionFunction, numPartitions, _partitions, dateTimeFormat, dateTimeGranularity);
     }
   }
 
   private ColumnMetadata(String columnName, int cardinality, int totalDocs, int totalRawDocs, int totalAggDocs,
-      DataType dataType, int bitsPerElement, int columnMaxLength, FieldType fieldType, boolean isSorted,
-      boolean hasNulls, boolean hasDictionary, boolean hasInvertedIndex, boolean isSingleValue,
+      DataType dataType, int bitsPerElement, int columnMaxLength, FieldType fieldType, String objectType,
+      boolean isSorted, boolean hasNulls, boolean hasDictionary, boolean hasInvertedIndex, boolean isSingleValue,
       int maxNumberOfMultiValues, int totalNumberOfEntries, boolean isAutoGenerated, boolean isVirtual,
       String defaultNullValueString, TimeUnit timeUnit, char paddingCharacter, DerivedMetricType derivedMetricType,
       int fieldSize, String originColumnName, Comparable minValue, Comparable maxValue,
@@ -421,6 +429,7 @@ public class ColumnMetadata {
     this.bitsPerElement = bitsPerElement;
     this.columnMaxLength = columnMaxLength;
     this.fieldType = fieldType;
+    this.objectType = objectType;
     this.isSorted = isSorted;
     this.containsNulls = hasNulls;
     this.hasDictionary = hasDictionary;
@@ -508,6 +517,10 @@ public class ColumnMetadata {
     return fieldType;
   }
 
+  public String getObjectType() {
+    return objectType;
+  }
+
   public boolean isSorted() {
     return isSorted;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org