You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by jo...@apache.org on 2021/03/09 21:32:50 UTC
[nifi] 01/06: NIFI-7969: ValidateRecord enhanced with Force Types
From Schema property
This is an automated email from the ASF dual-hosted git repository.
joewitt pushed a commit to branch support/nifi-1.13
in repository https://gitbox.apache.org/repos/asf/nifi.git
commit 4d64a71cd94361f390fb67bf9c4ce34959b279d7
Author: Peter Gyori <pe...@gmail.com>
AuthorDate: Tue Feb 16 19:37:33 2021 +0100
NIFI-7969: ValidateRecord enhanced with Force Types From Schema property
NIFI-7969: Documentation update
Clarified that the Force Types From Schema property applies to the data read, whereas the Strict Type Checking property applies to the validation.
NIFI-7969: Documentation update - updated the property name in additionalDetails.html
This closes #4825.
Signed-off-by: Peter Turcsanyi <tu...@apache.org>
---
.../nifi/processors/standard/ValidateRecord.java | 25 ++-
.../additionalDetails.html | 190 +++++++++++++++++++++
2 files changed, 211 insertions(+), 4 deletions(-)
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java
index 1210eec..a952aca 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java
@@ -172,14 +172,29 @@ public class ValidateRecord extends AbstractProcessor {
static final PropertyDescriptor STRICT_TYPE_CHECKING = new PropertyDescriptor.Builder()
.name("strict-type-checking")
.displayName("Strict Type Checking")
- .description("If the incoming data has a Record where a field is not of the correct type, this property determine whether how to handle the Record. "
- + "If true, the Record will still be considered invalid. If false, the Record will be considered valid and the field will be coerced into the "
- + "correct type (if possible, according to the type coercion supported by the Record Writer).")
+ .description("If the incoming data has a Record where a field is not of the correct type, this property determines how to handle the Record. "
+ + "If true, the Record will be considered invalid. If false, the Record will be considered valid and the field will be coerced into the "
+ + "correct type (if possible, according to the type coercion supported by the Record Writer). "
+ + "This property controls how the data is validated against the validation schema.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues("true", "false")
.defaultValue("true")
.required(true)
.build();
+ static final PropertyDescriptor COERCE_TYPES = new PropertyDescriptor.Builder()
+ .name("coerce-types")
+ .displayName("Force Types From Reader's Schema")
+ .description("If enabled, the processor will coerce every field to the type specified in the Reader's schema. "
+ + "If the value of a field cannot be coerced to the type, the field will be skipped (will not be read from the input data), "
+ + "thus will not appear in the output. "
+ + "If not enabled, then every field will appear in the output but their types may differ from what is "
+ + "specified in the schema. For details please see the Additional Details page of the processor's Help. "
+ + "This property controls how the data is read by the specified Record Reader.")
+ .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+ .allowableValues("true", "false")
+ .defaultValue("false")
+ .required(true)
+ .build();
static final PropertyDescriptor VALIDATION_DETAILS_ATTRIBUTE_NAME = new PropertyDescriptor.Builder()
.name("validation-details-attribute-name")
.displayName("Validation Details Attribute Name")
@@ -227,6 +242,7 @@ public class ValidateRecord extends AbstractProcessor {
properties.add(SCHEMA_TEXT);
properties.add(ALLOW_EXTRA_FIELDS);
properties.add(STRICT_TYPE_CHECKING);
+ properties.add(COERCE_TYPES);
properties.add(VALIDATION_DETAILS_ATTRIBUTE_NAME);
properties.add(MAX_VALIDATION_DETAILS_LENGTH);
return properties;
@@ -282,6 +298,7 @@ public class ValidateRecord extends AbstractProcessor {
final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
+ final boolean coerceTypes = context.getProperty(COERCE_TYPES).asBoolean();
RecordSetWriter validWriter = null;
RecordSetWriter invalidWriter = null;
@@ -306,7 +323,7 @@ public class ValidateRecord extends AbstractProcessor {
try {
Record record;
- while ((record = reader.nextRecord(false, false)) != null) {
+ while ((record = reader.nextRecord(coerceTypes, false)) != null) {
final SchemaValidationResult result = validator.validate(record);
recordCount++;
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateRecord/additionalDetails.html b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateRecord/additionalDetails.html
new file mode 100644
index 0000000..c46443c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateRecord/additionalDetails.html
@@ -0,0 +1,190 @@
+<!DOCTYPE html>
+<html lang="en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<head>
+ <meta charset="utf-8"/>
+ <title>ValidateRecord</title>
+ <link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css"/>
+ <style>
+ table td:first-child {text-align: center;}
+ </style>
+
+</head>
+
+<body>
+
+<h2>Examples for the effect of Force Types From Reader's Schema property</h2>
+
+<p>
+ The processor first reads the data from the incoming FlowFile using the specified Record Reader,
+ which uses a schema. Then, depending on the value of the Schema Access Strategy property, the processor
+ can either use the reader's schema, or a different schema to validate the data against.
+ After that, the processor writes the data into the outgoing FlowFile using the specified
+ Record Writer. If the data is valid, the validation schema is used by the writer.
+ If the data is invalid, the writer uses the reader's schema.
+ The <b>Force Types From Reader's Schema</b> property affects the first step:
+ how strictly the reader's schema should be applied when reading the data from the incoming FlowFile.
+ By affecting how the data is read, the value of the Force Types From Reader's Schema property also has an effect on what
+ the output of the ValidateRecord processor is,
+ and also whether the output is forwarded to the <b>valid</b> or the <b>invalid</b> relationship.
+ Below are two examples where the value of this property affects the output significantly.
+</p>
+
+<p>
+ In both examples the input is in XML format and the output is in JSON.
+ In the examples we assume that the same schema is used for reading, validation and writing.
+</p>
+
+<h3>Example 1</h3>
+
+<p>
+ Schema:
+</p>
+<pre><code>{
+ "namespace": "nifi",
+ "name": "test",
+ "type": "record",
+ "fields": [
+ { "name": "field1", "type": "string" },
+ { "name": "field2", "type": "string" }
+ ]
+}
+</code></pre>
+
+<p>
+ Input:
+</p>
+<pre><code><test>
+ <field1>
+ <sub_field>content</sub_field>
+ </field1>
+ <field2>content_of_field_2</field2>
+</test></code></pre>
+
+<p>
+ Output if <b>Force Types From Reader's Schema = true</b>
+ (forwarded to the <b>invalid</b> relationship):
+</p>
+<pre><code>[ {
+ "field2" : "content_of_field_2"
+} ]</code></pre>
+
+<p>
+ Output if <b>Force Types From Reader's Schema = false</b>
+ (forwarded to the <b>invalid</b> relationship):
+</p>
+<pre><code>[ {
+ "field1" : {
+ "sub_field" : "content"
+ },
+ "field2" : "content_of_field_2"
+} ]</code></pre>
+
+<p>
+ As you can see, the FlowFile is forwarded to the invalid relationship in both cases,
+ since the input data does not match the provided Avro schema.
+ However, if <b>Force Types From Reader's Schema = true</b>, only those fields appear in the output
+ that comply with the schema. If <b>Force Types From Reader's Schema = false</b>, all fields appear
+ in the output regardless of whether they comply with the schema or not.
+</p>
+
+<h3>Example 2</h3>
+
+<p>
+ Schema:
+</p>
+<pre><code>{
+ "namespace": "nifi",
+ "name": "test",
+ "type": "record",
+ "fields": [
+ {
+ "name": "field1",
+ "type": {
+ "type": "array",
+ "items": "string"
+ }
+ },
+ {
+ "name": "field2",
+ "type": {
+ "type": "array",
+ "items": "string"
+ }
+ }
+ ]
+}</code></pre>
+
+<p>
+ Input:
+</p>
+<pre><code><test>
+ <field1>content_1</field1>
+ <field2>content_2</field2>
+ <field2>content_3</field2>
+</test></code></pre>
+
+<p>
+ Output if <b>Force Types From Reader's Schema = true</b>
+ (forwarded to the <b>valid</b> relationship):
+</p>
+<pre><code>[ {
+ "field1" : [ "content_1" ],
+ "field2" : [ "content_2", "content_3" ]
+} ]</code></pre>
+
+<p>
+ Output if <b>Force Types From Reader's Schema = false</b>
+ (forwarded to the <b>invalid</b> relationship):
+</p>
+<pre><code>[ {
+ "field1" : "content_1",
+ "field2" : [ "content_2", "content_3" ]
+} ]</code></pre>
+
+<p>
+ The schema expects two fields (field1 and field2), both of type ARRAY.
+ field1 only appears once in the input XML document. If <b>Force Types From Reader's Schema = true</b>,
+ the processor forces this field to be in a type that complies with the schema.
+ So it is put in an array with one element. Since this type coercion can be done,
+ the output is routed to the <b>valid</b> relationship.
+ If <b>Force Types From Reader's Schema = false</b> the processor does not try to apply
+ type coercion, thus field1 appears in the output as a single value. According to the schema,
+ the processor expects an array for field1, but receives a single element so the output
+ is routed to the <b>invalid</b> relationship.
+</p>
+<p>
+ Schema compliance (and getting routed to the <b>valid</b> or the <b>invalid</b> relationship)
+ does not depend on what Writer is used to produce the output of the ValidateRecord processor.
+ Let us suppose that we used the same schema and input as in <b>Example 2</b>, but instead of
+ JsonRecordSetWriter, we used XMLRecordSetWriter to produce the output.
+ Both in case of <b>Force Types From Reader's Schema = true</b> and <b>Force Types From Reader's Schema = false</b>
+ the output is:
+</p>
+<pre><code><test>
+ <field1>content_1</field1>
+ <field2>content_2</field2>
+ <field2>content_3</field2>
+</test></code></pre>
+
+<p>
+ However, if <b>Force Types From Reader's Schema = true</b> this output is routed to the <b>valid</b>
+ relationship and if <b>Force Types From Reader's Schema = false</b> it is routed to the <b>invalid</b>
+ relationship.
+</p>
+
+</body>
+</html>
\ No newline at end of file