You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by mt...@apache.org on 2019/02/28 10:52:12 UTC

[nifi] branch master updated: NIFI-6088: Widen type inference for BIGINT and DOUBLE

This is an automated email from the ASF dual-hosted git repository.

mthomsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/master by this push:
     new e5fa18d  NIFI-6088: Widen type inference for BIGINT and DOUBLE
e5fa18d is described below

commit e5fa18d63cda60f0b10a8a1a2abff1cf69e976bd
Author: Matthew Burgess <ma...@apache.org>
AuthorDate: Wed Feb 27 20:25:25 2019 -0500

    NIFI-6088: Widen type inference for BIGINT and DOUBLE
    
    This closes #3342
    
    Signed-off-by: Mike Thomsen <mi...@gmail.com>
---
 .../nifi-record-serialization-services/pom.xml     |  1 +
 .../org/apache/nifi/json/JsonSchemaInference.java  |  6 +-
 .../apache/nifi/json/TestJsonSchemaInference.java  | 84 ++++++++++++++++++++++
 .../src/test/resources/json/data-types.json        | 24 +++++++
 4 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
index d65f471..27d4da7 100755
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@@ -152,6 +152,7 @@
                         <exclude>src/test/resources/json/bank-account-multiarray.json</exclude>
                         <exclude>src/test/resources/json/bank-account-multiline.json</exclude>
                         <exclude>src/test/resources/json/bank-account-oneline.json</exclude>
+                        <exclude>src/test/resources/json/data-types.json</exclude>
                         <exclude>src/test/resources/json/json-with-unicode.json</exclude>
                         <exclude>src/test/resources/json/primitive-type-array.json</exclude>
                         <exclude>src/test/resources/json/single-bank-account.json</exclude>
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
index b09c79f..02587cc 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
@@ -56,13 +56,13 @@ public class JsonSchemaInference extends HierarchicalSchemaInference<JsonNode> {
         }
 
         if (jsonNode.isIntegralNumber()) {
+            if (jsonNode.isBigInteger()) {
+                return RecordFieldType.BIGINT.getDataType();
+            }
             return RecordFieldType.LONG.getDataType();
         }
 
         if (jsonNode.isFloatingPointNumber()) {
-            return RecordFieldType.FLOAT.getDataType();
-        }
-        if (jsonNode.isDouble()) {
             return RecordFieldType.DOUBLE.getDataType();
         }
         if (jsonNode.isBinary()) {
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
new file mode 100644
index 0000000..0e50764
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.json;
+
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
+import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+public class TestJsonSchemaInference {
+
+    private final TimeValueInference timestampInference = new TimeValueInference("yyyy-MM-dd", "HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+
+    @Test
+    public void testInferenceIncludesAllRecords() throws IOException {
+        final File file = new File("src/test/resources/json/data-types.json");
+
+        final RecordSchema schema;
+        try (final InputStream in = new FileInputStream(file);
+             final InputStream bufferedIn = new BufferedInputStream(in)) {
+
+            final InferSchemaAccessStrategy<?> accessStrategy = new InferSchemaAccessStrategy<>(
+                    (var, content) -> new JsonRecordSource(content),
+                    new JsonSchemaInference(timestampInference), Mockito.mock(ComponentLog.class));
+            schema = accessStrategy.getSchema(null, bufferedIn, null);
+        }
+
+        assertSame(RecordFieldType.STRING, schema.getDataType("varcharc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("uuid").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("tinyintc").get().getFieldType());
+        assertSame(RecordFieldType.STRING, schema.getDataType("textc").get().getFieldType());
+        assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), schema.getDataType("datec").get());
+        assertSame(RecordFieldType.LONG, schema.getDataType("smallintc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("mediumintc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("intc").get().getFieldType());
+        assertSame(RecordFieldType.BIGINT, schema.getDataType("bigintc").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, schema.getDataType("floatc").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, schema.getDataType("doublec").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, schema.getDataType("decimalc").get().getFieldType());
+        assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), schema.getDataType("timestampc").get());
+        assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"), schema.getDataType("timec").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("charc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("tinytextc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("blobc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("mediumtextc").get());
+        assertSame(RecordFieldType.LONG, schema.getDataType("enumc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("setc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, schema.getDataType("boolc").get().getFieldType());
+        assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("binaryc").get());
+
+        final List<String> fieldNames = schema.getFieldNames();
+        assertEquals(Arrays.asList("varcharc", "uuid", "tinyintc", "textc", "datec", "smallintc", "mediumintc", "intc", "bigintc",
+                "floatc", "doublec", "decimalc", "timestampc", "timec", "charc", "tinytextc", "blobc", "mediumtextc", "enumc", "setc", "boolc", "binaryc"), fieldNames);
+    }
+
+}
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
new file mode 100644
index 0000000..343288e
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
@@ -0,0 +1,24 @@
+[{
+  "varcharc": "Nam penatibus in neque.",
+  "uuid": 1,
+  "tinyintc": -81,
+  "textc": "A faucibus volutpat placerat euismod mollis, quis semper quis ultrices aliquam massa vestibulum a lacus hendrerit turpis nullam, tincidunt ullamcorper ad ridiculus habitasse tristique vivamus elit.  Ac id montes erat accumsan rhoncus consectetur leo condimentum.\n\nConubia lectus et viverra taciti, mollis molestie phasellus, fermentum accumsan sem nisi sit dapibus interdum ridiculus blandit blandit.  Volutpat nullam orci cras.  Justo nullam penatibus non fusce vivamus integer [...]
+  "datec": "2019-02-27",
+  "smallintc": -8423,
+  "mediumintc": 6008538,
+  "intc": -1130599020,
+  "bigintc": 171234567890123456789,
+  "floatc": 182.33,
+  "doublec": 149.67382865705562,
+  "decimalc": 109.88,
+  "timestampc": "2019-02-27T20:40:53.000Z",
+  "timec": "20:40:53",
+  "charc": "DBDDGpPz",
+  "tinytextc": "hgFuypClmWWMNsDXEFJJOhdsljdBP",
+  "blobc": "Wc5YvvF8fUsOgejKPsOa",
+  "mediumtextc": "Torquent aliquet malesuada adipiscing, eget himenaeos facilisi ridiculus eros netus, nisi semper eleifend dolor nisi sapien phasellus luctus libero aenean suscipit pulvinar, lacus posuere id hendrerit feugiat vitae purus ac blandit euismod pharetra.  Adipiscing lectus primis eros pellentesque porta blandit dictum fermentum lectus tortor nam, fusce est dis class ornare neque est enim quisque a.\n\nScelerisque aptent etiam non imperdiet volutpat.  Quisque est fusce purus  [...]
+  "enumc": 1,
+  "setc": 4,
+  "boolc": 0,
+  "binaryc": "ehynfnybBfxmxgkMVrVt"
+}]
\ No newline at end of file