You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/05/18 00:15:26 UTC

[GitHub] [incubator-pinot] Jackie-Jiang commented on a change in pull request #6918: Convert collections not unnested to json

Jackie-Jiang commented on a change in pull request #6918:
URL: https://github.com/apache/incubator-pinot/pull/6918#discussion_r633942532



##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
##########
@@ -78,17 +81,27 @@
  */
 public class ComplexTypeTransformer implements RecordTransformer {
   private static final String DEFAULT_DELIMITER = ".";
+  public static final ComplexTypeConfig.CollectionToJsonMode DEFAULT_COLLECTION_TO_JSON_MODE =
+      ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE;
   private final List<String> _unnestFields;
   private final String _delimiter;
+  private final ComplexTypeConfig.CollectionToJsonMode _collectionToJsonMode;
 
   public ComplexTypeTransformer(TableConfig tableConfig) {
-    this(parseUnnestFields(tableConfig), parseDelimiter(tableConfig));
+    this(parseUnnestFields(tableConfig), parseDelimiter(tableConfig), parseCollectionToJsonMode(tableConfig));

Review comment:
       Suggest directly parsing table config here instead of reusing the other constructor. The parse functions seem very verbose to me

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
##########
@@ -212,25 +234,65 @@ protected void flattenMap(GenericRow record, List<String> columns) {
           }
         }
         flattenMap(record, mapColumns);
-      } else if (value instanceof Collection && _unnestFields.contains(column)) {
-        for (Object inner : (Collection) value) {
-          if (inner instanceof Map) {
-            Map<String, Object> innerMap = (Map<String, Object>) inner;
-            flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+      } else if (value instanceof Collection) {
+        Collection collection = (Collection) value;
+        if (_unnestFields.contains(column)) {
+          for (Object inner : collection) {
+            if (inner instanceof Map) {
+              Map<String, Object> innerMap = (Map<String, Object>) inner;
+              flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+            }
+          }
+        } else if (shallConvertToJson(collection)) {
+          try {
+            // convert the collection to JSON string
+            String jsonString = JsonFunctions.jsonFormat(collection);
+            record.putValue(column, jsonString);
+          } catch (JsonProcessingException e) {
+            throw new RuntimeException(
+                String.format("Caught exception while converting value to JSON string %s", value), e);
           }
         }
-      } else if (isArray(value) && _unnestFields.contains(column)) {
-        for (Object inner : (Object[]) value) {
-          if (inner instanceof Map) {
-            Map<String, Object> innerMap = (Map<String, Object>) inner;
-            flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+      } else if (isArray(value)) {
+        Object[] array = (Object[]) value;
+        if (_unnestFields.contains(column)) {
+          for (Object inner : array) {
+            if (inner instanceof Map) {
+              Map<String, Object> innerMap = (Map<String, Object>) inner;
+              flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+            }
+          }
+        } else if (shallConvertToJson(array)) {
+          try {
+            // convert the array to JSON string
+            String jsonString = JsonFunctions.jsonFormat(array);
+            record.putValue(column, jsonString);
+          } catch (JsonProcessingException e) {
+            throw new RuntimeException(
+                String.format("Caught exception while converting value to JSON string %s", value), e);
           }
         }
       }
     }
   }
 
-  static private boolean isArray(Object obj) {
+  private boolean containPrimitives(Object[] value) {

Review comment:
       We only check the first value. Please add some javadoc describing the behavior

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
##########
@@ -212,25 +234,65 @@ protected void flattenMap(GenericRow record, List<String> columns) {
           }
         }
         flattenMap(record, mapColumns);
-      } else if (value instanceof Collection && _unnestFields.contains(column)) {
-        for (Object inner : (Collection) value) {
-          if (inner instanceof Map) {
-            Map<String, Object> innerMap = (Map<String, Object>) inner;
-            flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+      } else if (value instanceof Collection) {
+        Collection collection = (Collection) value;
+        if (_unnestFields.contains(column)) {
+          for (Object inner : collection) {
+            if (inner instanceof Map) {
+              Map<String, Object> innerMap = (Map<String, Object>) inner;
+              flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+            }
+          }
+        } else if (shallConvertToJson(collection)) {
+          try {
+            // convert the collection to JSON string
+            String jsonString = JsonFunctions.jsonFormat(collection);
+            record.putValue(column, jsonString);
+          } catch (JsonProcessingException e) {
+            throw new RuntimeException(
+                String.format("Caught exception while converting value to JSON string %s", value), e);
           }
         }
-      } else if (isArray(value) && _unnestFields.contains(column)) {
-        for (Object inner : (Object[]) value) {
-          if (inner instanceof Map) {
-            Map<String, Object> innerMap = (Map<String, Object>) inner;
-            flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+      } else if (isArray(value)) {
+        Object[] array = (Object[]) value;
+        if (_unnestFields.contains(column)) {
+          for (Object inner : array) {
+            if (inner instanceof Map) {
+              Map<String, Object> innerMap = (Map<String, Object>) inner;
+              flattenMap(column, innerMap, new ArrayList<>(innerMap.keySet()));
+            }
+          }
+        } else if (shallConvertToJson(array)) {
+          try {
+            // convert the array to JSON string
+            String jsonString = JsonFunctions.jsonFormat(array);
+            record.putValue(column, jsonString);
+          } catch (JsonProcessingException e) {
+            throw new RuntimeException(
+                String.format("Caught exception while converting value to JSON string %s", value), e);
           }
         }
       }
     }
   }
 
-  static private boolean isArray(Object obj) {
+  private boolean containPrimitives(Object[] value) {
+    if (value.length == 0) {
+      return true;
+    }
+    Object element = value[0];
+    return !(element instanceof Map || element instanceof Collection || isArray(element));
+  }
+
+  private boolean containPrimitives(Collection value) {
+    if (value.isEmpty()) {
+      return true;
+    }
+    Object element = value.iterator().next();
+    return !(element instanceof Map || element instanceof Collection || isArray(element));
+  }
+
+  static boolean isArray(Object obj) {

Review comment:
       Is this visible to test? Add `private` or annotation




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org