You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/02/01 12:15:21 UTC
[druid] branch master updated: Fixing incorrect filtering of nulls in an array when ingesting for JSON and Avro (#13712)
This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 74ff848ce5 Fixing incorrect filtering of nulls in an array when ingesting for JSON and Avro (#13712)
74ff848ce5 is described below
commit 74ff848ce54cd389bdae6153322ba8cbc32d5cb2
Author: somu-imply <93...@users.noreply.github.com>
AuthorDate: Wed Feb 1 04:15:08 2023 -0800
Fixing incorrect filtering of nulls in an array when ingesting for JSON and Avro (#13712)
---
.../util/common/parsers/JSONFlattenerMaker.java | 5 +--
.../druid/data/input/impl/JSONParseSpecTest.java | 39 +++++++++++++++++-
.../druid/data/input/impl/JsonLineReaderTest.java | 6 ++-
.../druid/data/input/avro/AvroFlattenerMaker.java | 3 +-
.../avro-extensions/src/test/avro/some-datum.avsc | 12 ++++--
.../data/input/AvroStreamInputFormatTest.java | 7 ++--
.../data/input/AvroStreamInputRowParserTest.java | 7 ++--
.../data/input/avro/AvroFlattenerMakerTest.java | 48 +++++++++++++++++++++-
8 files changed, 106 insertions(+), 21 deletions(-)
diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
index 0b8244e29b..5df9819908 100644
--- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
+++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
@@ -179,12 +179,11 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
return ((BinaryNode) val).binaryValue();
}
+
if (val.isArray()) {
List<Object> newList = new ArrayList<>();
for (JsonNode entry : val) {
- if (!entry.isNull()) {
- newList.add(convertJsonNode(entry, enc));
- }
+ newList.add(convertJsonNode(entry, enc));
}
return newList;
}
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
index d5728f7988..b89c11258b 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
@@ -102,7 +102,7 @@ public class JSONParseSpecTest
);
final Map<String, Object> expected = new HashMap<>();
- expected.put("foo", new ArrayList());
+ expected.put("foo", Collections.singletonList(null));
expected.put("baz", null);
expected.put("bar", Collections.singletonList("test"));
@@ -113,6 +113,43 @@ public class JSONParseSpecTest
Assert.assertEquals(expected, parsedRow);
}
+ @Test
+ public void testParseRowWithNullsInArrays()
+ {
+ final JSONParseSpec parseSpec = new JSONParseSpec(
+ new TimestampSpec("timestamp", "iso", null),
+ new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))),
+ new JSONPathSpec(
+ true,
+ ImmutableList.of(
+ // https://github.com/apache/druid/issues/6653 $.x.y.z where y is missing
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.baz.[?(@.maybe_object)].maybe_object"),
+ // https://github.com/apache/druid/issues/6653 $.x.y.z where y is from an array and is null
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "nullFoo", "$.nullFoo.[?(@.value)][0].foo"),
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.baz"),
+ // $.x.y.z where x is from an array and is null
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "nullBaz", "$.baz[1].foo.maybe_object"),
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo")
+ )
+ ),
+ null,
+ false
+ );
+
+ final Map<String, Object> expected = new HashMap<>();
+ expected.put("foo", new ArrayList<>());
+ expected.put("baz", Arrays.asList("1", null, "2", null));
+ expected.put("bar", Collections.singletonList("test"));
+ expected.put("nullFoo", new ArrayList<>());
+ expected.put("nullBaz", null);
+
+ final Parser<String, Object> parser = parseSpec.makeParser();
+ final Map<String, Object> parsedRow = parser.parseToMap("{\"baz\":[\"1\",null,\"2\",null],\"nullFoo\":{\"value\":[null,null]},\"something_else\": {\"foo\": \"test\"}}");
+
+ Assert.assertNotNull(parsedRow);
+ Assert.assertEquals(expected, parsedRow);
+ }
+
@Test
public void testSerde() throws IOException
{
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JsonLineReaderTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JsonLineReaderTest.java
index 3ea56aa71f..5b0a3a391a 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JsonLineReaderTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JsonLineReaderTest.java
@@ -141,7 +141,11 @@ public class JsonLineReaderTest
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals("test", Iterables.getOnlyElement(row.getDimension("bar")));
- Assert.assertEquals(Collections.emptyList(), row.getDimension("foo"));
+ // Since foo is in the JSONPathSpec it comes as an array of [null]
+ // row.getRaw("foo") comes out as an array of nulls but the
+ // row.getDimension("foo") stringifies it as "null". A future developer should aim to relieve this
+ Assert.assertEquals(Collections.singletonList(null), row.getRaw("foo"));
+ Assert.assertEquals(Collections.singletonList("null"), row.getDimension("foo"));
Assert.assertTrue(row.getDimension("baz").isEmpty());
numActualIterations++;
}
diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
index ba9d895b1f..d557b65c84 100644
--- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
+++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
@@ -39,7 +39,6 @@ import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -194,7 +193,7 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker<Gener
} else if (field instanceof Utf8) {
return field.toString();
} else if (field instanceof List) {
- return ((List<?>) field).stream().filter(Objects::nonNull).map(this::transformValue).collect(Collectors.toList());
+ return ((List<?>) field).stream().map(this::transformValue).collect(Collectors.toList());
} else if (field instanceof GenericEnumSymbol) {
return field.toString();
} else if (field instanceof GenericFixed) {
diff --git a/extensions-core/avro-extensions/src/test/avro/some-datum.avsc b/extensions-core/avro-extensions/src/test/avro/some-datum.avsc
index f03cb708df..fe43d719ee 100644
--- a/extensions-core/avro-extensions/src/test/avro/some-datum.avsc
+++ b/extensions-core/avro-extensions/src/test/avro/some-datum.avsc
@@ -33,10 +33,14 @@
},
{
"name": "someStringArray",
- "type": {
- "type": "array",
- "items": "string"
- }
+ "type": [
+ "null",
+ {
+ "type": "array",
+ "items": ["null","string"]
+ }
+ ],
+ "default":null
},
{
"name": "someIntValueMap",
diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputFormatTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputFormatTest.java
index a59c32b810..177766510d 100644
--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputFormatTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputFormatTest.java
@@ -81,7 +81,7 @@ import static org.apache.druid.data.input.AvroStreamInputRowParserTest.buildSome
* "someOtherId": 6568719896,
* "isValid": true,
* "someIntArray": [1, 2, 4, 8],
- * "someStringArray": ["8", "4", "2", "1"],
+ * "someStringArray": ["8", "4", "2", "1", null],
* "someIntValueMap": {"8": 8, "1": 1, "2": 2, "4": 4},
* "someStringValueMap": {"8": "8", "1": "1", "2": "2", "4": "4"},
* "someUnion": "string as union",
@@ -108,14 +108,13 @@ public class AvroStreamInputFormatTest extends InitializedNullHandlingTest
private static final List<String> DIMENSIONS_SCHEMALESS = Arrays.asList(
"nested",
SOME_OTHER_ID,
- "someStringArray",
"someIntArray",
"someFloat",
+ "someUnion",
EVENT_TYPE,
+ ID,
"someFixed",
"someBytes",
- "someUnion",
- ID,
"someEnum",
"someLong",
"someInt",
diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
index 3f206a77a0..3bcec4e3c1 100644
--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
@@ -88,14 +88,13 @@ public class AvroStreamInputRowParserTest
private static final List<String> DIMENSIONS_SCHEMALESS = Arrays.asList(
"nested",
SOME_OTHER_ID,
- "someStringArray",
"someIntArray",
"someFloat",
+ "someUnion",
EVENT_TYPE,
+ ID,
"someFixed",
"someBytes",
- "someUnion",
- ID,
"someEnum",
"someLong",
"someInt",
@@ -128,7 +127,7 @@ public class AvroStreamInputRowParserTest
.setSubInt(SUB_INT_VALUE)
.setSubLong(SUB_LONG_VALUE)
.build();
- private static final List<CharSequence> SOME_STRING_ARRAY_VALUE = Arrays.asList("8", "4", "2", "1");
+ private static final List<CharSequence> SOME_STRING_ARRAY_VALUE = Arrays.asList("8", "4", "2", "1", null);
private static final List<Integer> SOME_INT_ARRAY_VALUE = Arrays.asList(1, 2, 4, 8);
static final Map<CharSequence, Integer> SOME_INT_VALUE_MAP_VALUE = Maps.asMap(
new HashSet<>(Arrays.asList("8", "2", "4", "1")), new Function<CharSequence, Integer>()
diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
index 28caea3049..59e15deeb7 100644
--- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
+++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
@@ -164,11 +164,12 @@ public class AvroFlattenerMakerTest
final AvroFlattenerMaker flattenerNested = new AvroFlattenerMaker(false, false, true, true);
SomeAvroDatum input = AvroStreamInputRowParserTest.buildSomeAvroDatum();
-
+ // isFieldPrimitive on someStringArray is false
+ // as it contains items as nulls and strings
+ // so flattenerNested should only be able to discover it
Assert.assertEquals(
ImmutableSet.of(
"someOtherId",
- "someStringArray",
"someIntArray",
"someFloat",
"eventType",
@@ -210,6 +211,49 @@ public class AvroFlattenerMakerTest
);
}
+
+ @Test
+ public void testNullsInStringArray()
+ {
+ final AvroFlattenerMaker flattenerNested = new AvroFlattenerMaker(false, false, true, true);
+
+ SomeAvroDatum input = AvroStreamInputRowParserTest.buildSomeAvroDatum();
+
+ Assert.assertEquals(
+ ImmutableSet.of(
+ "someStringValueMap",
+ "someOtherId",
+ "someStringArray",
+ "someIntArray",
+ "someFloat",
+ "isValid",
+ "someIntValueMap",
+ "eventType",
+ "someFixed",
+ "someBytes",
+ "someRecord",
+ "someMultiMemberUnion",
+ "someNull",
+ "someRecordArray",
+ "someUnion",
+ "id",
+ "someEnum",
+ "someLong",
+ "someInt",
+ "timestamp"
+ ),
+ ImmutableSet.copyOf(flattenerNested.discoverRootFields(input))
+ );
+
+ ArrayList<Object> results = (ArrayList<Object>) flattenerNested.getRootField(input, "someStringArray");
+ // 4 strings a 1 null for a total of 5
+ Assert.assertEquals("8", results.get(0).toString());
+ Assert.assertEquals("4", results.get(1).toString());
+ Assert.assertEquals("2", results.get(2).toString());
+ Assert.assertEquals("1", results.get(3).toString());
+ Assert.assertEquals(null, results.get(4));
+ }
+
private void getRootField_common(final SomeAvroDatum record, final AvroFlattenerMaker flattener)
{
Assert.assertEquals(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org