You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2018/11/27 22:14:14 UTC

[GitHub] gianm closed pull request #6654: faster flattening for non-existent paths

gianm closed pull request #6654: faster flattening for non-existent paths
URL: https://github.com/apache/incubator-druid/pull/6654
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java
new file mode 100644
index 00000000000..a98999a1b72
--- /dev/null
+++ b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.data.input.impl;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
+
+import java.util.Collection;
+import java.util.Collections;
+
+//
+//
+
+/**
+ * Custom json-path JsonProvider override to circumvent slow performance when encountering null paths as described in
+ * https://github.com/json-path/JsonPath/issues/396
+ *
+ * Note that this only avoids errors for map properties, avoiding the exception on array paths is not possible without
+ * patching json-path itself
+ */
+public class FastJacksonJsonNodeJsonProvider extends JacksonJsonNodeJsonProvider
+{
+  @Override
+  public boolean isMap(Object obj)
+  {
+    return obj == null || super.isMap(obj);
+  }
+
+  @Override
+  public Object getMapValue(Object obj, String key)
+  {
+    if (obj == null) {
+      return null;
+    } else {
+      ObjectNode jsonObject = (ObjectNode) obj;
+      Object o = jsonObject.get(key);
+      if (!jsonObject.has(key)) {
+        return null;
+      } else {
+        return unwrap(o);
+      }
+    }
+  }
+
+  @Override
+  public Collection<String> getPropertyKeys(final Object o)
+  {
+    if (o == null) {
+      return Collections.emptySet();
+    }
+    return super.getPropertyKeys(o);
+  }
+}
diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
index d16280a8831..8d58451f34b 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
@@ -24,10 +24,10 @@
 import com.jayway.jsonpath.Configuration;
 import com.jayway.jsonpath.JsonPath;
 import com.jayway.jsonpath.Option;
-import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
 import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
 import net.thisptr.jackson.jq.JsonQuery;
 import net.thisptr.jackson.jq.exception.JsonQueryException;
+import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider;
 import org.apache.druid.java.util.common.StringUtils;
 
 import javax.annotation.Nullable;
@@ -45,7 +45,7 @@
 {
   private static final Configuration JSONPATH_CONFIGURATION =
       Configuration.builder()
-                   .jsonProvider(new JacksonJsonNodeJsonProvider())
+                   .jsonProvider(new FastJacksonJsonNodeJsonProvider())
                    .mappingProvider(new JacksonMappingProvider())
                    .options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS))
                    .build();
@@ -119,7 +119,9 @@ private Object valueConversionFunction(JsonNode val)
     if (val.isArray()) {
       List<Object> newList = new ArrayList<>();
       for (JsonNode entry : val) {
-        newList.add(valueConversionFunction(entry));
+        if (!entry.isNull()) {
+          newList.add(valueConversionFunction(entry));
+        }
       }
       return newList;
     }
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
index 7168076b265..4dd13b601a6 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
@@ -91,6 +91,7 @@ public void testParseRowWithConditional()
             true,
             ImmutableList.of(
                 new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"),
+                new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"),
                 new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo")
             )
         ),
@@ -99,6 +100,7 @@ public void testParseRowWithConditional()
 
     final Map<String, Object> expected = new HashMap<>();
     expected.put("foo", new ArrayList());
+    expected.put("baz", null);
     expected.put("bar", Collections.singletonList("test"));
 
     final Parser<String, Object> parser = parseSpec.makeParser();
diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
index facdaa043a7..0b55d762f00 100644
--- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
+++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
@@ -34,6 +34,7 @@
 import java.nio.ByteBuffer;
 import java.util.EnumSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
@@ -140,6 +141,9 @@ private Object transformValue(final Object field)
     if (field instanceof Utf8) {
       return field.toString();
     }
+    if (field instanceof List) {
+      return ((List) field).stream().filter(Objects::nonNull).collect(Collectors.toList());
+    }
     return field;
   }
 }
diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
index eb41632fee8..4beb7578dcd 100644
--- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
+++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
@@ -27,6 +27,7 @@
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -98,7 +99,9 @@ public int length(final Object o)
   @Override
   public Collection<String> getPropertyKeys(final Object o)
   {
-    if (o instanceof Map) {
+    if (o == null) {
+      return Collections.emptySet();
+    } else if (o instanceof Map) {
       return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
     } else if (o instanceof GenericRecord) {
       return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet());
@@ -138,7 +141,9 @@ public void setArrayIndex(final Object o, final int i, final Object o1)
   @Override
   public Object getMapValue(final Object o, final String s)
   {
-    if (o instanceof GenericRecord) {
+    if (o == null) {
+      return null;
+    } else if (o instanceof GenericRecord) {
       return ((GenericRecord) o).get(s);
     } else if (o instanceof Map) {
       return ((Map) o).get(s);
@@ -172,7 +177,7 @@ public void removeProperty(final Object o, final Object o1)
   @Override
   public boolean isMap(final Object o)
   {
-    return o instanceof Map || o instanceof GenericRecord;
+    return o == null || o instanceof Map || o instanceof GenericRecord;
   }
 
   @Override
diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
index cd2612ebd01..2ba939b162a 100644
--- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
+++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
@@ -30,6 +30,7 @@
 import javax.annotation.Nullable;
 import java.util.EnumSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
@@ -100,7 +101,7 @@ private Object finalizeConversion(Object o)
     if (ParquetGroupConverter.isWrappedListPrimitive(o)) {
       return converter.unwrapListPrimitive(o);
     } else if (o instanceof List) {
-      List<Object> asList = (List<Object>) o;
+      List<Object> asList = ((List<Object>) o).stream().filter(Objects::nonNull).collect(Collectors.toList());
       if (asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) {
         return asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList());
       }
diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
index 78e3f3355f7..b422e97d7ed 100644
--- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
+++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
@@ -26,6 +26,7 @@
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -64,7 +65,7 @@ public boolean isArray(final Object o)
   @Override
   public boolean isMap(final Object o)
   {
-    return o instanceof Map || o instanceof Group;
+    return o == null || o instanceof Map || o instanceof Group;
   }
 
   @Override
@@ -93,7 +94,9 @@ public int length(final Object o)
   @Override
   public Collection<String> getPropertyKeys(final Object o)
   {
-    if (o instanceof Map) {
+    if (o == null) {
+      return Collections.emptySet();
+    } else if (o instanceof Map) {
       return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
     } else if (o instanceof Group) {
       return ((Group) o).getType().getFields().stream().map(f -> f.getName()).collect(Collectors.toSet());
@@ -105,7 +108,9 @@ public int length(final Object o)
   @Override
   public Object getMapValue(final Object o, final String s)
   {
-    if (o instanceof Map) {
+    if (o == null) {
+      return null;
+    } else if (o instanceof Map) {
       return ((Map) o).get(s);
     } else if (o instanceof Group) {
       Group g = (Group) o;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org