You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nifi.apache.org by bb...@apache.org on 2018/06/13 18:33:21 UTC

[1/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Repository: nifi
Updated Branches:
  refs/heads/master 8feac9ae5 -> da99f873a


http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java
new file mode 100644
index 0000000..4682d76
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java
@@ -0,0 +1,437 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.orc;
+
+
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.util.Utf8;
+import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Unit tests for the NiFiOrcUtils helper class
+ */
+public class TestNiFiOrcUtils {
+
+    @Test
+    public void test_getOrcField_primitive() {
+        // Expected ORC types
+        TypeInfo[] expectedTypes = {
+                TypeInfoFactory.getPrimitiveTypeInfo("int"),
+                TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
+                TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
+                TypeInfoFactory.getPrimitiveTypeInfo("float"),
+                TypeInfoFactory.getPrimitiveTypeInfo("double"),
+                TypeInfoFactory.getPrimitiveTypeInfo("binary"),
+                TypeInfoFactory.getPrimitiveTypeInfo("string")
+        };
+
+        // Build a fake Avro record with all types
+        Schema testSchema = buildPrimitiveAvroSchema();
+        List<Schema.Field> fields = testSchema.getFields();
+        for (int i = 0; i < fields.size(); i++) {
+            assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema(), false));
+        }
+
+    }
+
+    @Test
+    public void test_getOrcField_union_optional_type() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("union").type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema(), false);
+        assertEquals(TypeInfoCreator.createBoolean(), orcType);
+    }
+
+    @Test
+    public void test_getOrcField_union() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema(), false);
+        assertEquals(
+                TypeInfoFactory.getUnionTypeInfo(Arrays.asList(
+                        TypeInfoCreator.createInt(),
+                        TypeInfoCreator.createBoolean())),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_map() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("map").type().map().values().doubleType().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema(), true);
+        assertEquals(
+                TypeInfoFactory.getMapTypeInfo(
+                        TypeInfoCreator.createString(),
+                        TypeInfoCreator.createDouble()),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_nested_map() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("map").type().map().values().map().values().doubleType().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema(), false);
+        assertEquals(
+                TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
+                        TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_array() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("array").type().array().items().longType().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema(), false);
+        assertEquals(
+                TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_complex_array() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("Array").type().array().items().map().values().floatType().noDefault();
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("Array").schema(), true);
+        assertEquals(
+                TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createFloat())),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_record() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("Int").type().intType().noDefault();
+        builder.name("Long").type().longType().longDefault(1L);
+        builder.name("Array").type().array().items().stringType().noDefault();
+        Schema testSchema = builder.endRecord();
+        // Normalize field names for Hive, assert that their names are now lowercase
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema, true);
+        assertEquals(
+                TypeInfoFactory.getStructTypeInfo(
+                        Arrays.asList("int", "long", "array"),
+                        Arrays.asList(
+                                TypeInfoCreator.createInt(),
+                                TypeInfoCreator.createLong(),
+                                TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
+                orcType);
+    }
+
+    @Test
+    public void test_getOrcField_enum() {
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
+        builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
+        Schema testSchema = builder.endRecord();
+        TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema(), true);
+        assertEquals(TypeInfoCreator.createString(), orcType);
+    }
+
+    @Test
+    public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() {
+        // Expected ORC types
+        TypeInfo[] expectedTypes = {
+                TypeInfoCreator.createInt(),
+                TypeInfoCreator.createLong(),
+                TypeInfoCreator.createBoolean(),
+                TypeInfoCreator.createFloat(),
+                TypeInfoCreator.createDouble(),
+                TypeInfoCreator.createBinary(),
+                TypeInfoCreator.createString(),
+        };
+
+        Schema testSchema = buildPrimitiveAvroSchema();
+        List<Schema.Field> fields = testSchema.getFields();
+        for (int i = 0; i < fields.size(); i++) {
+            assertEquals(expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType()));
+        }
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void test_getPrimitiveOrcTypeFromPrimitiveAvroType_badType() {
+        Schema.Type nonPrimitiveType = Schema.Type.ARRAY;
+        NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(nonPrimitiveType);
+    }
+
+    @Test
+    public void test_getWritable() throws Exception {
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, 1, true) instanceof IntWritable);
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L, true) instanceof LongWritable);
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f, true) instanceof FloatWritable);
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0, true) instanceof DoubleWritable);
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}, true) instanceof List);
+        assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3), true) instanceof List);
+        Map<String, Float> map = new HashMap<>();
+        map.put("Hello", 1.0f);
+        map.put("World", 2.0f);
+
+        Object convMap = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map, true);
+        assertTrue(convMap instanceof Map);
+        ((Map) convMap).forEach((key, value) -> {
+            assertTrue(key instanceof Text);
+            assertTrue(value instanceof FloatWritable);
+        });
+    }
+
+    @Test
+    public void test_getHiveTypeFromAvroType_primitive() {
+        // Expected ORC types
+        String[] expectedTypes = {
+                "INT",
+                "BIGINT",
+                "BOOLEAN",
+                "FLOAT",
+                "DOUBLE",
+                "BINARY",
+                "STRING",
+        };
+
+        Schema testSchema = buildPrimitiveAvroSchema();
+        List<Schema.Field> fields = testSchema.getFields();
+        for (int i = 0; i < fields.size(); i++) {
+            assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema(), false));
+        }
+    }
+
+    @Test
+    public void test_getHiveTypeFromAvroType_complex() {
+        // Expected ORC types
+        String[] expectedTypes = {
+                "INT",
+                "MAP<STRING, DOUBLE>",
+                "STRING",
+                "UNIONTYPE<BIGINT, FLOAT>",
+                "ARRAY<INT>"
+        };
+
+        Schema testSchema = buildComplexAvroSchema();
+        List<Schema.Field> fields = testSchema.getFields();
+        for (int i = 0; i < fields.size(); i++) {
+            assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema(), false));
+        }
+
+        assertEquals("STRUCT<myInt:INT, myMap:MAP<STRING, DOUBLE>, myEnum:STRING, myLongOrFloat:UNIONTYPE<BIGINT, FLOAT>, myIntList:ARRAY<INT>>",
+                NiFiOrcUtils.getHiveTypeFromAvroType(testSchema, false));
+    }
+
+    @Test
+    public void test_generateHiveDDL_primitive() {
+        Schema avroSchema = buildPrimitiveAvroSchema();
+        String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable", false);
+        assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
+                + " STORED AS ORC", ddl);
+    }
+
+    @Test
+    public void test_generateHiveDDL_complex() {
+        Schema avroSchema = buildComplexAvroSchema();
+        String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable", false);
+        assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable "
+                + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
+                + " STORED AS ORC", ddl);
+    }
+
+    @Test
+    public void test_generateHiveDDL_complex_normalize() {
+        Schema avroSchema = buildComplexAvroSchema();
+        String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable", true);
+        assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable "
+                + "(myint INT, mymap MAP<STRING, DOUBLE>, myenum STRING, mylongorfloat UNIONTYPE<BIGINT, FLOAT>, myintlist ARRAY<INT>)"
+                + " STORED AS ORC", ddl);
+    }
+
+    @Test
+    public void test_convertToORCObject() {
+        Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x", "y", "z");
+        List<Object> objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x"));
+        objects.forEach((avroObject) -> {
+            Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,string>"), avroObject, true);
+            assertTrue(o instanceof UnionObject);
+            UnionObject uo = (UnionObject) o;
+            assertTrue(uo.getObject() instanceof Text);
+        });
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void test_convertToORCObjectBadUnion() {
+        NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,long>"), "Hello", true);
+    }
+
+    @Test
+    public void test_getHiveTypeFromAvroType_complex_normalize() {
+        // Expected ORC types
+        String[] expectedTypes = {
+                "INT",
+                "MAP<STRING, DOUBLE>",
+                "STRING",
+                "UNIONTYPE<BIGINT, FLOAT>",
+                "ARRAY<INT>"
+        };
+
+        Schema testSchema = buildComplexAvroSchema();
+        List<Schema.Field> fields = testSchema.getFields();
+        for (int i = 0; i < fields.size(); i++) {
+            assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema(), true));
+        }
+
+        assertEquals("STRUCT<myint:INT, mymap:MAP<STRING, DOUBLE>, myenum:STRING, mylongorfloat:UNIONTYPE<BIGINT, FLOAT>, myintlist:ARRAY<INT>>",
+                NiFiOrcUtils.getHiveTypeFromAvroType(testSchema, true));
+    }
+
+    //////////////////
+    // Helper methods
+    //////////////////
+
+    public static Schema buildPrimitiveAvroSchema() {
+        // Build a fake Avro record with all primitive types
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
+        builder.name("int").type().intType().noDefault();
+        builder.name("long").type().longType().longDefault(1L);
+        builder.name("boolean").type().booleanType().booleanDefault(true);
+        builder.name("float").type().floatType().floatDefault(0.0f);
+        builder.name("double").type().doubleType().doubleDefault(0.0);
+        builder.name("bytes").type().bytesType().noDefault();
+        builder.name("string").type().stringType().stringDefault("default");
+        return builder.endRecord();
+    }
+
+    public static GenericData.Record buildPrimitiveAvroRecord(int i, long l, boolean b, float f, double d, ByteBuffer bytes, String string) {
+        Schema schema = buildPrimitiveAvroSchema();
+        GenericData.Record row = new GenericData.Record(schema);
+        row.put("int", i);
+        row.put("long", l);
+        row.put("boolean", b);
+        row.put("float", f);
+        row.put("double", d);
+        row.put("bytes", bytes);
+        row.put("string", string);
+        return row;
+    }
+
+    public static TypeInfo buildPrimitiveOrcSchema() {
+        return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
+                Arrays.asList(
+                        TypeInfoCreator.createInt(),
+                        TypeInfoCreator.createLong(),
+                        TypeInfoCreator.createBoolean(),
+                        TypeInfoCreator.createFloat(),
+                        TypeInfoCreator.createDouble(),
+                        TypeInfoCreator.createBinary(),
+                        TypeInfoCreator.createString()));
+    }
+
+    public static Schema buildComplexAvroSchema() {
+        // Build a fake Avro record with nested  types
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("complex.record").namespace("any.data").fields();
+        builder.name("myInt").type().unionOf().nullType().and().intType().endUnion().nullDefault();
+        builder.name("myMap").type().map().values().doubleType().noDefault();
+        builder.name("myEnum").type().enumeration("myEnum").symbols("ABC", "DEF", "XYZ").enumDefault("ABC");
+        builder.name("myLongOrFloat").type().unionOf().longType().and().floatType().endUnion().noDefault();
+        builder.name("myIntList").type().array().items().intType().noDefault();
+        return builder.endRecord();
+    }
+
+    public static GenericData.Record buildComplexAvroRecord(Integer i, Map<String, Double> m, String e, Object unionVal, List<Integer> intArray) {
+        Schema schema = buildComplexAvroSchema();
+        GenericData.Record row = new GenericData.Record(schema);
+        row.put("myInt", i);
+        row.put("myMap", m);
+        row.put("myEnum", e);
+        row.put("myLongOrFloat", unionVal);
+        row.put("myIntList", intArray);
+        return row;
+    }
+
+    public static TypeInfo buildComplexOrcSchema() {
+        return TypeInfoUtils.getTypeInfoFromTypeString("struct<myInt:int,myMap:map<string,double>,myEnum:string,myLongOrFloat:uniontype<int>,myIntList:array<int>>");
+    }
+
+    public static Schema buildNestedComplexAvroSchema() {
+        // Build a fake Avro record with nested complex types
+        final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("nested.complex.record").namespace("any.data").fields();
+        builder.name("myMapOfArray").type().map().values().array().items().doubleType().noDefault();
+        builder.name("myArrayOfMap").type().array().items().map().values().stringType().noDefault();
+        return builder.endRecord();
+    }
+
+    public static GenericData.Record buildNestedComplexAvroRecord(Map<String, List<Double>> m, List<Map<String, String>> a) {
+        Schema schema = buildNestedComplexAvroSchema();
+        GenericData.Record row = new GenericData.Record(schema);
+        row.put("myMapOfArray", m);
+        row.put("myArrayOfMap", a);
+        return row;
+    }
+
+    public static TypeInfo buildNestedComplexOrcSchema() {
+        return TypeInfoUtils.getTypeInfoFromTypeString("struct<myMapOfArray:map<string,array<double>>,myArrayOfMap:array<map<string,string>>>");
+    }
+
+    private static class TypeInfoCreator {
+        static TypeInfo createInt() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("int");
+        }
+
+        static TypeInfo createLong() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
+        }
+
+        static TypeInfo createBoolean() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
+        }
+
+        static TypeInfo createFloat() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("float");
+        }
+
+        static TypeInfo createDouble() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("double");
+        }
+
+        static TypeInfo createBinary() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("binary");
+        }
+
+        static TypeInfo createString() {
+            return TypeInfoFactory.getPrimitiveTypeInfo("string");
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/array_of_records.avsc
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/array_of_records.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/array_of_records.avsc
new file mode 100644
index 0000000..1e5154c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/array_of_records.avsc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ {
+  "namespace" : "org.apache.nifi",
+  "name" : "outer_record",
+  "type" : "record",
+  "fields" : [ {
+    "name" : "records",
+    "type" : {
+      "type" : "array",
+      "items" : {
+        "type" : "record",
+        "name" : "inner_record",
+        "fields" : [ {
+          "name" : "name",
+          "type" : "string"
+        }, {
+          "name" : "age",
+          "type" : "string"
+        } ]
+      }
+    }
+  } ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site-security.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site-security.xml
new file mode 100644
index 0000000..eefc74e
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site-security.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>fs.default.name</name>
+        <value>hdfs://hive</value>
+    </property>
+    <property>
+        <name>hadoop.security.authentication</name>
+        <value>kerberos</value>
+    </property>
+    <property>
+        <name>hadoop.security.authorization</name>
+        <value>true</value>
+    </property>
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site.xml
new file mode 100644
index 0000000..8a7d178
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/core-site.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>fs.default.name</name>
+        <value>file:///</value>
+    </property>
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/fake.keytab
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/fake.keytab b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/fake.keytab
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site-security.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site-security.xml
new file mode 100644
index 0000000..07fd74c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site-security.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+  <property>
+    <name>fs.default.name</name>
+    <value>hdfs://hive</value>
+  </property>
+  <property>
+    <name>hive.server2.authentication</name>
+    <value>KERBEROS</value>
+  </property>
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site.xml
new file mode 100644
index 0000000..7e7f86c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/hive-site.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+  <property>
+    <name>fs.default.name</name>
+    <value>file:///</value>
+  </property>
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/krb5.conf
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/krb5.conf b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/krb5.conf
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user.avsc
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user.avsc
new file mode 100644
index 0000000..95ef6e4
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]},
+     {"name": "scale", "type": ["double", "null"]}
+ ]
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user_logical_types.avsc
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user_logical_types.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user_logical_types.avsc
new file mode 100644
index 0000000..054a2f2
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/resources/user_logical_types.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "id",  "type": ["null","int"]},
+     {"name": "timeMillis","type": {"type": "int","logicalType": "time-millis"}},
+     {"name": "timestampMillis","type": {"type": "long","logicalType": "timestamp-millis"}},
+     {"name": "dt","type": {"type": "int","logicalType": "date"}},
+     {"name": "dec","type": {"type": "bytes","logicalType": "decimal", "precision": 4, "scale": 2}}
+ ]
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/pom.xml
index fc7a751..51c9640 100644
--- a/nifi-nar-bundles/nifi-hive-bundle/pom.xml
+++ b/nifi-nar-bundles/nifi-hive-bundle/pom.xml
@@ -26,63 +26,22 @@
     <version>1.7.0-SNAPSHOT</version>
     <packaging>pom</packaging>
 
-    <properties>
-        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
-        <hive.hadoop.version>2.6.2</hive.hadoop.version>
-        <hadoop.version>${hive.hadoop.version}</hadoop.version>
-    </properties>
-
     <modules>
         <module>nifi-hive-services-api</module>
         <module>nifi-hive-services-api-nar</module>
         <module>nifi-hive-processors</module>
         <module>nifi-hive-nar</module>
+        <module>nifi-hive3-processors</module>
+        <module>nifi-hive3-nar</module>
     </modules>
 
-    <dependencyManagement>
-        <dependencies>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-common</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-hdfs</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-yarn-api</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-mapreduce-client-core</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-annotations</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-client</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-auth</artifactId>
-                <version>${hadoop.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.avro</groupId>
-                <artifactId>avro</artifactId>
-                <version>1.7.7</version>
-            </dependency>
-        </dependencies>
-    </dependencyManagement>
+    <properties>
+        <hive.version>1.2.1</hive.version>
+        <hive.hadoop.version>2.6.2</hive.hadoop.version>
+        <hive3.version>3.0.0</hive3.version>
+        <hive3.hadoop.version>3.0.0</hive3.hadoop.version>
+        <orc.version>1.4.3</orc.version>
+    </properties>
 
     <build>
         <plugins>

[5/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Posted by bb...@apache.org.

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPool.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPool.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPool.java
new file mode 100644
index 0000000..b0662b8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPool.java
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.dbcp.hive;
+
+import java.io.File;
+
+import org.apache.commons.dbcp.BasicDataSource;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.jdbc.HiveDriver;
+import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnDisabled;
+import org.apache.nifi.annotation.lifecycle.OnEnabled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.controller.AbstractControllerService;
+import org.apache.nifi.controller.ConfigurationContext;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.hadoop.KerberosProperties;
+import org.apache.nifi.hadoop.SecurityUtil;
+import org.apache.nifi.kerberos.KerberosCredentialsService;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.util.hive.AuthenticationFailedException;
+import org.apache.nifi.util.hive.HiveConfigurator;
+import org.apache.nifi.util.hive.HiveUtils;
+import org.apache.nifi.util.hive.ValidationResources;
+
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.nifi.controller.ControllerServiceInitializationContext;
+
+/**
+ * Implementation for Database Connection Pooling Service used for Apache Hive
+ * connections. Apache DBCP is used for connection pooling functionality.
+ */
+@RequiresInstanceClassLoading
+@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
+@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 3.x. Connections can be asked from pool and returned after usage.")
+public class Hive3ConnectionPool extends AbstractControllerService implements Hive3DBCPService {
+    private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB";
+
+    static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder()
+            .name("hive-db-connect-url")
+            .displayName("Database Connection URL")
+            .description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters."
+                    + " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included "
+                    + "as a connection parameter when connecting to a secure Hive server.")
+            .defaultValue(null)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .required(true)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
+            .name("hive-config-resources")
+            .displayName("Hive Configuration Resources")
+            .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop "
+                    + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication "
+                    + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.")
+            .required(false)
+            .addValidator(HiveUtils.createMultipleFilesExistValidator())
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder()
+            .name("hive-db-user")
+            .displayName("Database User")
+            .description("Database user name")
+            .defaultValue(null)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder()
+            .name("hive-db-password")
+            .displayName("Password")
+            .description("The password for the database user")
+            .defaultValue(null)
+            .required(false)
+            .sensitive(true)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder()
+            .name("hive-max-wait-time")
+            .displayName("Max Wait Time")
+            .description("The maximum amount of time that the pool will wait (when there are no available connections) "
+                    + " for a connection to be returned before failing, or -1 to wait indefinitely. ")
+            .defaultValue("500 millis")
+            .required(true)
+            .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder()
+            .name("hive-max-total-connections")
+            .displayName("Max Total Connections")
+            .description("The maximum number of active connections that can be allocated from this pool at the same time, "
+                    + "or negative for no limit.")
+            .defaultValue("8")
+            .required(true)
+            .addValidator(StandardValidators.INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder()
+            .name("Validation-query")
+            .displayName("Validation query")
+            .description("Validation query used to validate connections before returning them. "
+                    + "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. "
+                    + "NOTE: Using validation may have a performance penalty.")
+            .required(false)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    private static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder()
+            .name("kerberos-credentials-service")
+            .displayName("Kerberos Credentials Service")
+            .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos")
+            .identifiesControllerService(KerberosCredentialsService.class)
+            .required(false)
+            .build();
+
+
+    private List<PropertyDescriptor> properties;
+
+    private String connectionUrl = "unknown";
+
+    // Holder of cached Configuration information so validation does not reload the same config over and over
+    private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
+
+    private volatile BasicDataSource dataSource;
+
+    private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
+    private volatile UserGroupInformation ugi;
+    private volatile File kerberosConfigFile = null;
+    private volatile KerberosProperties kerberosProperties;
+
+    @Override
+    protected void init(final ControllerServiceInitializationContext context) {
+        List<PropertyDescriptor> props = new ArrayList<>();
+        props.add(DATABASE_URL);
+        props.add(HIVE_CONFIGURATION_RESOURCES);
+        props.add(DB_USER);
+        props.add(DB_PASSWORD);
+        props.add(MAX_WAIT_TIME);
+        props.add(MAX_TOTAL_CONNECTIONS);
+        props.add(VALIDATION_QUERY);
+        props.add(KERBEROS_CREDENTIALS_SERVICE);
+
+        kerberosConfigFile = context.getKerberosConfigurationFile();
+        kerberosProperties = new KerberosProperties(kerberosConfigFile);
+        props.add(kerberosProperties.getKerberosPrincipal());
+        props.add(kerberosProperties.getKerberosKeytab());
+        properties = props;
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return properties;
+    }
+
+    @Override
+    protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
+        boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet();
+
+        final List<ValidationResult> problems = new ArrayList<>();
+
+        if (confFileProvided) {
+            final String explicitPrincipal = validationContext.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue();
+            final String explicitKeytab = validationContext.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue();
+            final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
+
+            final String resolvedPrincipal;
+            final String resolvedKeytab;
+            if (credentialsService == null) {
+                resolvedPrincipal = explicitPrincipal;
+                resolvedKeytab = explicitKeytab;
+            } else {
+                resolvedPrincipal = credentialsService.getPrincipal();
+                resolvedKeytab = credentialsService.getKeytab();
+            }
+
+
+            final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
+            problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, validationResourceHolder, getLogger()));
+
+            if (credentialsService != null && (explicitPrincipal != null || explicitKeytab != null)) {
+                problems.add(new ValidationResult.Builder()
+                        .subject("Kerberos Credentials")
+                        .valid(false)
+                        .explanation("Cannot specify both a Kerberos Credentials Service and a principal/keytab")
+                        .build());
+            }
+
+            final String allowExplicitKeytabVariable = System.getenv(ALLOW_EXPLICIT_KEYTAB);
+            if ("false".equalsIgnoreCase(allowExplicitKeytabVariable) && (explicitPrincipal != null || explicitKeytab != null)) {
+                problems.add(new ValidationResult.Builder()
+                        .subject("Kerberos Credentials")
+                        .valid(false)
+                        .explanation("The '" + ALLOW_EXPLICIT_KEYTAB + "' system environment variable is configured to forbid explicitly configuring principal/keytab in processors. "
+                                + "The Kerberos Credentials Service should be used instead of setting the Kerberos Keytab or Kerberos Principal property.")
+                        .build());
+            }
+        }
+
+        return problems;
+    }
+
+    /**
+     * Configures connection pool by creating an instance of the
+     * {@link BasicDataSource} based on configuration provided with
+     * {@link ConfigurationContext}.
+     * <p>
+     * This operation makes no guarantees that the actual connection could be
+     * made since the underlying system may still go off-line during normal
+     * operation of the connection pool.
+     * <p/>
+     * As of Apache NiFi 1.5.0, due to changes made to
+     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking
+     * {@link HiveConfigurator#authenticate(Configuration, String, String)}
+     * to authenticate a principal with Kerberos, Hive controller services no longer
+     * attempt relogins explicitly.  For more information, please read the documentation for
+     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
+     * <p/>
+     * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
+     * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
+     * controller service was enabled.  The use of a separate thread to explicitly relogin could cause race conditions
+     * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
+     * {@link UserGroupInformation} instance.  One of these threads could leave the
+     * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
+     * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
+     * authentication attempts that would leave the Hive controller service in an unrecoverable state.
+     *
+     * @see SecurityUtil#loginKerberos(Configuration, String, String)
+     * @see HiveConfigurator#authenticate(Configuration, String, String)
+     * @see HiveConfigurator#authenticate(Configuration, String, String, long)
+     * @param context the configuration context
+     * @throws InitializationException if unable to create a database connection
+     */
+    @OnEnabled
+    public void onConfigured(final ConfigurationContext context) throws InitializationException {
+
+        ComponentLog log = getLogger();
+
+        final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
+        final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
+        final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue();
+
+        // add any dynamic properties to the Hive configuration
+        for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
+            final PropertyDescriptor descriptor = entry.getKey();
+            if (descriptor.isDynamic()) {
+                hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue());
+            }
+        }
+
+        final String drv = HiveDriver.class.getName();
+        if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
+            final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue();
+            final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue();
+            final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
+
+            final String resolvedPrincipal;
+            final String resolvedKeytab;
+            if (credentialsService == null) {
+                resolvedPrincipal = explicitPrincipal;
+                resolvedKeytab = explicitKeytab;
+            } else {
+                resolvedPrincipal = credentialsService.getPrincipal();
+                resolvedKeytab = credentialsService.getKeytab();
+            }
+
+            log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
+            try {
+                ugi = hiveConfigurator.authenticate(hiveConfig, resolvedPrincipal, resolvedKeytab);
+            } catch (AuthenticationFailedException ae) {
+                log.error(ae.getMessage(), ae);
+            }
+
+            getLogger().info("Successfully logged in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
+        }
+
+        final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue();
+        final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue();
+        final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
+        final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger();
+
+        dataSource = new BasicDataSource();
+        dataSource.setDriverClassName(drv);
+
+        connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue();
+
+        dataSource.setMaxWait(maxWaitMillis);
+        dataSource.setMaxActive(maxTotal);
+
+        if (validationQuery != null && !validationQuery.isEmpty()) {
+            dataSource.setValidationQuery(validationQuery);
+            dataSource.setTestOnBorrow(true);
+        }
+
+        dataSource.setUrl(connectionUrl);
+        dataSource.setUsername(user);
+        dataSource.setPassword(passw);
+    }
+
+    /**
+     * Shutdown pool, close all open connections.
+     */
+    @OnDisabled
+    public void shutdown() {
+        try {
+            dataSource.close();
+        } catch (final SQLException e) {
+            throw new ProcessException(e);
+        }
+    }
+
+    @Override
+    public Connection getConnection() throws ProcessException {
+        try {
+            if (ugi != null) {
+                try {
+                    return ugi.doAs((PrivilegedExceptionAction<Connection>) () -> dataSource.getConnection());
+                } catch (UndeclaredThrowableException e) {
+                    Throwable cause = e.getCause();
+                    if (cause instanceof SQLException) {
+                        throw (SQLException) cause;
+                    } else {
+                        throw e;
+                    }
+                }
+            } else {
+                getLogger().info("Simple Authentication");
+                return dataSource.getConnection();
+            }
+        } catch (SQLException | IOException | InterruptedException e) {
+            getLogger().error("Error getting Hive connection", e);
+            throw new ProcessException(e);
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "Hive3ConnectionPool[id=" + getIdentifier() + "]";
+    }
+
+    @Override
+    public String getConnectionURL() {
+        return connectionUrl;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive3QLProcessor.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive3QLProcessor.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive3QLProcessor.java
new file mode 100644
index 0000000..4fcce19
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive3QLProcessor.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import org.antlr.runtime.tree.CommonTree;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.dbcp.hive.Hive3DBCPService;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.stream.io.StreamUtils;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.sql.Date;
+import java.sql.PreparedStatement;
+import java.sql.SQLDataException;
+import java.sql.SQLException;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * An abstract base class for HiveQL processors to share common data, methods, etc.
+ */
+public abstract class AbstractHive3QLProcessor extends AbstractSessionFactoryProcessor {
+
+    protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
+    protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+");
+    static String ATTR_INPUT_TABLES = "query.input.tables";
+    static String ATTR_OUTPUT_TABLES = "query.output.tables";
+
+
+    public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
+            .name("hive3-dbcp-service")
+            .displayName("Hive Database Connection Pooling Service")
+            .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
+            .required(true)
+            .identifiesControllerService(Hive3DBCPService.class)
+            .build();
+
+    public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
+            .name("hive3-charset")
+            .displayName("Character Set")
+            .description("Specifies the character set of the record data.")
+            .required(true)
+            .defaultValue("UTF-8")
+            .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+            .build();
+
+    public static final PropertyDescriptor QUERY_TIMEOUT = new PropertyDescriptor.Builder()
+            .name("hive3-query-timeout")
+            .displayName("Query timeout")
+            .description("Sets the number of seconds the driver will wait for a query to execute. "
+                    + "A value of 0 means no timeout. NOTE: Non-zero values may not be supported by the driver.")
+            .defaultValue("0")
+            .required(true)
+            .addValidator(StandardValidators.INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    /**
+     * Determines the HiveQL statement that should be executed for the given FlowFile
+     *
+     * @param session  the session that can be used to access the given FlowFile
+     * @param flowFile the FlowFile whose HiveQL statement should be executed
+     * @return the HiveQL that is associated with the given FlowFile
+     */
+    protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
+        // Read the HiveQL from the FlowFile's content
+        final byte[] buffer = new byte[(int) flowFile.getSize()];
+        session.read(flowFile, in -> StreamUtils.fillBuffer(in, buffer));
+
+        // Create the PreparedStatement to use for this FlowFile.
+        return new String(buffer, charset);
+    }
+
+    private class ParameterHolder {
+        String attributeName;
+        int jdbcType;
+        String value;
+    }
+
+    /**
+     * Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes.
+     *
+     * @param stmt       the statement to set the parameters on
+     * @param attributes the attributes from which to derive parameter indices, values, and types
+     * @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called
+     */
+    protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map<String, String> attributes) throws SQLException {
+
+        Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer, ParameterHolder>();
+
+        for (final Map.Entry<String, String> entry : attributes.entrySet()) {
+                final String key = entry.getKey();
+                final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key);
+                if (matcher.matches()) {
+                    final int parameterIndex = Integer.parseInt(matcher.group(1));
+                    if (parameterIndex >= base && parameterIndex < base + paramCount) {
+                        final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches();
+                        if (!isNumeric) {
+                            throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType");
+                        }
+
+                        final String valueAttrName = "hiveql.args." + parameterIndex + ".value";
+
+                        ParameterHolder ph = new ParameterHolder();
+                        int realIndexLoc = parameterIndex - base +1;
+
+                        ph.jdbcType = Integer.parseInt(entry.getValue());
+                        ph.value = attributes.get(valueAttrName);
+                        ph.attributeName = valueAttrName;
+
+                        parmMap.put(realIndexLoc, ph);
+
+                    }
+                }
+        }
+
+
+        // Now that's we've retrieved the correct number of parameters and it's sorted, let's set them.
+        for (final Map.Entry<Integer, ParameterHolder> entry : parmMap.entrySet()) {
+            final Integer index = entry.getKey();
+            final ParameterHolder ph = entry.getValue();
+
+            try {
+                setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType);
+            } catch (final NumberFormatException nfe) {
+                throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe);
+            }
+        }
+        return base + paramCount;
+    }
+
+    /**
+     * Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the
+     * provided PreparedStatement
+     *
+     * @param stmt           the PreparedStatement to set the parameter on
+     * @param attrName       the name of the attribute that the parameter is coming from - for logging purposes
+     * @param parameterIndex the index of the HiveQL parameter to set
+     * @param parameterValue the value of the HiveQL parameter to set
+     * @param jdbcType       the JDBC Type of the HiveQL parameter to set
+     * @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter
+     */
+    protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException {
+        if (parameterValue == null) {
+            stmt.setNull(parameterIndex, jdbcType);
+        } else {
+            try {
+                switch (jdbcType) {
+                    case Types.BIT:
+                    case Types.BOOLEAN:
+                        stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue));
+                        break;
+                    case Types.TINYINT:
+                        stmt.setByte(parameterIndex, Byte.parseByte(parameterValue));
+                        break;
+                    case Types.SMALLINT:
+                        stmt.setShort(parameterIndex, Short.parseShort(parameterValue));
+                        break;
+                    case Types.INTEGER:
+                        stmt.setInt(parameterIndex, Integer.parseInt(parameterValue));
+                        break;
+                    case Types.BIGINT:
+                        stmt.setLong(parameterIndex, Long.parseLong(parameterValue));
+                        break;
+                    case Types.REAL:
+                        stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue));
+                        break;
+                    case Types.FLOAT:
+                    case Types.DOUBLE:
+                        stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue));
+                        break;
+                    case Types.DECIMAL:
+                    case Types.NUMERIC:
+                        stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue));
+                        break;
+                    case Types.DATE:
+                        stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue)));
+                        break;
+                    case Types.TIME:
+                        stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue)));
+                        break;
+                    case Types.TIMESTAMP:
+                        stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue)));
+                        break;
+                    case Types.CHAR:
+                    case Types.VARCHAR:
+                    case Types.LONGNVARCHAR:
+                    case Types.LONGVARCHAR:
+                        stmt.setString(parameterIndex, parameterValue);
+                        break;
+                    default:
+                        stmt.setObject(parameterIndex, parameterValue, jdbcType);
+                        break;
+                }
+            } catch (SQLException e) {
+                // Log which attribute/parameter had an error, then rethrow to be handled at the top level
+                getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e);
+                throw e;
+            }
+        }
+    }
+
+    protected static class TableName {
+        private final String database;
+        private final String table;
+        private final boolean input;
+
+        TableName(String database, String table, boolean input) {
+            this.database = database;
+            this.table = table;
+            this.input = input;
+        }
+
+        public String getDatabase() {
+            return database;
+        }
+
+        public String getTable() {
+            return table;
+        }
+
+        public boolean isInput() {
+            return input;
+        }
+
+        @Override
+        public String toString() {
+            return database == null || database.isEmpty() ? table : database + '.' + table;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            TableName tableName = (TableName) o;
+
+            if (input != tableName.input) return false;
+            if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false;
+            return table.equals(tableName.table);
+        }
+
+        @Override
+        public int hashCode() {
+            int result = database != null ? database.hashCode() : 0;
+            result = 31 * result + table.hashCode();
+            result = 31 * result + (input ? 1 : 0);
+            return result;
+        }
+    }
+
+    protected Set<TableName> findTableNames(final String query) {
+        final ASTNode node;
+        try {
+            node = new ParseDriver().parse(normalize(query));
+        } catch (ParseException e) {
+            // If failed to parse the query, just log a message, but continue.
+            getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e);
+            return Collections.emptySet();
+        }
+        final HashSet<TableName> tableNames = new HashSet<>();
+        findTableNames(node, tableNames);
+        return tableNames;
+    }
+
+    /**
+     * Normalize query.
+     * Hive resolves prepared statement parameters before executing a query,
+     * see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail.
+     * HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one.
+     * In this normalize method, '?' is replaced to 'x' to avoid that.
+     */
+    private String normalize(String query) {
+        return query.replace('?', 'x');
+    }
+
+    private void findTableNames(final Object obj, final Set<TableName> tableNames) {
+        if (!(obj instanceof CommonTree)) {
+            return;
+        }
+        final CommonTree tree = (CommonTree) obj;
+        final int childCount = tree.getChildCount();
+        if ("TOK_TABNAME".equals(tree.getText())) {
+            final TableName tableName;
+            final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText());
+            switch (childCount) {
+                case 1 :
+                    tableName = new TableName(null, tree.getChild(0).getText(), isInput);
+                    break;
+                case 2:
+                    tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput);
+                    break;
+                default:
+                    throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount);
+            }
+            // If parent is TOK_TABREF, then it is an input table.
+            tableNames.add(tableName);
+            return;
+        }
+        for (int i = 0; i < childCount; i++) {
+            findTableNames(tree.getChild(i), tableNames);
+        }
+    }
+
+    protected Map<String, String> toQueryTableAttributes(Set<TableName> tableNames) {
+        final Map<String, String> attributes = new HashMap<>();
+        for (TableName tableName : tableNames) {
+            final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES;
+            if (attributes.containsKey(attributeName)) {
+                attributes.put(attributeName, attributes.get(attributeName) + "," + tableName);
+            } else {
+                attributes.put(attributeName, tableName.toString());
+            }
+        }
+        return attributes;
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3QL.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3QL.java
new file mode 100644
index 0000000..989d085
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3QL.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.ReadsAttribute;
+import org.apache.nifi.annotation.behavior.ReadsAttributes;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.dbcp.hive.Hive3DBCPService;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessSessionFactory;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processor.util.pattern.ErrorTypes;
+import org.apache.nifi.processor.util.pattern.ExceptionHandler;
+import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError;
+import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles;
+import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection;
+import org.apache.nifi.processor.util.pattern.Put;
+import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
+import org.apache.nifi.processor.util.pattern.RoutingResult;
+
+import java.nio.charset.Charset;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.SQLNonTransientException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+@SeeAlso(SelectHive3QL.class)
+@InputRequirement(Requirement.INPUT_REQUIRED)
+@Tags({"sql", "hive", "put", "database", "update", "insert"})
+@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command "
+        + "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes "
+        + "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be "
+        + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.")
+@ReadsAttributes({
+        @ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer "
+                + "that represents the JDBC Type of the parameter."),
+        @ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as "
+                + "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.")
+})
+@WritesAttributes({
+        @WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
+                + "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."),
+        @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
+                + "and contains the target table names in 'databaseName.tableName' format.")
+})
+public class PutHive3QL extends AbstractHive3QLProcessor {
+
+    public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
+            .name("hive-batch-size")
+            .displayName("Batch Size")
+            .description("The preferred number of FlowFiles to put to the database in a single transaction")
+            .required(true)
+            .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
+            .defaultValue("100")
+            .build();
+
+    public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder()
+            .name("statement-delimiter")
+            .displayName("Statement Delimiter")
+            .description("Statement Delimiter used to separate SQL statements in a multiple statement script")
+            .required(true)
+            .defaultValue(";")
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+            .build();
+
+    public static final Relationship REL_SUCCESS = new Relationship.Builder()
+            .name("success")
+            .description("A FlowFile is routed to this relationship after the database is successfully updated")
+            .build();
+    public static final Relationship REL_RETRY = new Relationship.Builder()
+            .name("retry")
+            .description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
+            .build();
+    public static final Relationship REL_FAILURE = new Relationship.Builder()
+            .name("failure")
+            .description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, "
+                    + "such as an invalid query or an integrity constraint violation")
+            .build();
+
+
+    private final static List<PropertyDescriptor> propertyDescriptors;
+    private final static Set<Relationship> relationships;
+
+    /*
+     * Will ensure that the list of property descriptors is built only once.
+     * Will also create a Set of relationships
+     */
+    static {
+        List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
+        _propertyDescriptors.add(HIVE_DBCP_SERVICE);
+        _propertyDescriptors.add(BATCH_SIZE);
+        _propertyDescriptors.add(QUERY_TIMEOUT);
+        _propertyDescriptors.add(CHARSET);
+        _propertyDescriptors.add(STATEMENT_DELIMITER);
+        _propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE);
+        propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
+
+        Set<Relationship> _relationships = new HashSet<>();
+        _relationships.add(REL_SUCCESS);
+        _relationships.add(REL_FAILURE);
+        _relationships.add(REL_RETRY);
+        relationships = Collections.unmodifiableSet(_relationships);
+    }
+
+    private Put<FunctionContext, Connection> process;
+    private ExceptionHandler<FunctionContext> exceptionHandler;
+
+    @OnScheduled
+    public void constructProcess() {
+        exceptionHandler = new ExceptionHandler<>();
+        exceptionHandler.mapException(e -> {
+            if (e instanceof SQLNonTransientException) {
+                return ErrorTypes.InvalidInput;
+            } else if (e instanceof SQLException) {
+                return ErrorTypes.TemporalFailure;
+            } else {
+                return ErrorTypes.UnknownFailure;
+            }
+        });
+        exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger()));
+
+        process = new Put<>();
+        process.setLogger(getLogger());
+        process.initConnection(initConnection);
+        process.fetchFlowFiles(fetchFlowFiles);
+        process.putFlowFile(putFlowFile);
+        process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY));
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return propertyDescriptors;
+    }
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return relationships;
+    }
+
+    private class FunctionContext extends RollbackOnFailure {
+        final Charset charset;
+        final String statementDelimiter;
+        final long startNanos = System.nanoTime();
+
+        String connectionUrl;
+
+
+        private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) {
+            super(rollbackOnFailure, false);
+            this.charset = charset;
+            this.statementDelimiter = statementDelimiter;
+        }
+    }
+
+    private InitConnection<FunctionContext, Connection> initConnection = (context, session, fc, ff) -> {
+        final Hive3DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive3DBCPService.class);
+        final Connection connection = dbcpService.getConnection();
+        fc.connectionUrl = dbcpService.getConnectionURL();
+        return connection;
+    };
+
+    private FetchFlowFiles<FunctionContext> fetchFlowFiles = (context, session, functionContext, result) -> {
+        final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
+        return session.get(batchSize);
+    };
+
+    private Put.PutFlowFile<FunctionContext, Connection> putFlowFile = (context, session, fc, conn, flowFile, result) -> {
+        final String script = getHiveQL(session, flowFile, fc.charset);
+        String regex = "(?<!\\\\)" + Pattern.quote(fc.statementDelimiter);
+
+        String[] hiveQLs = script.split(regex);
+
+        final Set<TableName> tableNames = new HashSet<>();
+        exceptionHandler.execute(fc, flowFile, input -> {
+            int loc = 1;
+            for (String hiveQLStr: hiveQLs) {
+                getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr});
+
+                final String hiveQL = hiveQLStr.trim();
+                if (!StringUtils.isEmpty(hiveQL)) {
+                    final PreparedStatement stmt = conn.prepareStatement(hiveQL);
+
+                    // Get ParameterMetadata
+                    // Hive JDBC Doesn't support this yet:
+                    // ParameterMetaData pmd = stmt.getParameterMetaData();
+                    // int paramCount = pmd.getParameterCount();
+                    int paramCount = StringUtils.countMatches(hiveQL, "?");
+
+                    if (paramCount > 0) {
+                        loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes());
+                    }
+
+                    // Parse hiveQL and extract input/output tables
+                    try {
+                        tableNames.addAll(findTableNames(hiveQL));
+                    } catch (Exception e) {
+                        // If failed to parse the query, just log a warning message, but continue.
+                        getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e);
+                    }
+
+                    stmt.setQueryTimeout(context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(flowFile).asInteger());
+
+                    // Execute the statement
+                    stmt.execute();
+                    fc.proceed();
+                }
+            }
+
+            // Emit a Provenance SEND event
+            final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos);
+
+            final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames));
+            session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true);
+            result.routeTo(flowFile, REL_SUCCESS);
+
+        }, onFlowFileError(context, session, result));
+
+    };
+
+    private OnError<FunctionContext, FlowFile> onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) {
+        OnError<FunctionContext, FlowFile> onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY);
+        onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> {
+            switch (r.destination()) {
+                case Failure:
+                    getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e);
+                    break;
+                case Retry:
+                    getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry",
+                            new Object[] {i, e}, e);
+                    break;
+            }
+        });
+        return RollbackOnFailure.createOnError(onFlowFileError);
+    }
+
+    @Override
+    public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
+        final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean();
+        final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
+        final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue();
+        final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter);
+        RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext));
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3Streaming.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3Streaming.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3Streaming.java
new file mode 100644
index 0000000..664915c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/PutHive3Streaming.java
@@ -0,0 +1,560 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.common.util.ShutdownHookManager;
+import org.apache.hive.streaming.ConnectionError;
+import org.apache.hive.streaming.HiveStreamingConnection;
+import org.apache.hive.streaming.InvalidTable;
+import org.apache.hive.streaming.SerializationError;
+import org.apache.hive.streaming.StreamingConnection;
+import org.apache.hive.streaming.StreamingException;
+import org.apache.hive.streaming.StreamingIOFailure;
+import org.apache.hive.streaming.TransactionError;
+import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.annotation.lifecycle.OnStopped;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.hadoop.SecurityUtil;
+import org.apache.nifi.kerberos.KerberosCredentialsService;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processor.util.pattern.DiscontinuedException;
+import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
+import org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.util.StringUtils;
+import org.apache.nifi.util.hive.AuthenticationFailedException;
+import org.apache.nifi.util.hive.HiveConfigurator;
+import org.apache.nifi.util.hive.HiveOptions;
+import org.apache.hive.streaming.HiveRecordWriter;
+import org.apache.nifi.util.hive.HiveUtils;
+import org.apache.nifi.util.hive.ValidationResources;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static org.apache.nifi.processors.hive.AbstractHive3QLProcessor.ATTR_OUTPUT_TABLES;
+
+@Tags({"hive", "streaming", "put", "database", "store"})
+@CapabilityDescription("This processor uses Hive Streaming to send flow file records to an Apache Hive 3.0+ table. "
+        + "The partition values are expected to be the 'last' fields of each record, so if the table is partitioned on column A for example, then the last field in "
+        + "each record should be field A.")
+@WritesAttributes({
+        @WritesAttribute(attribute = "hivestreaming.record.count", description = "This attribute is written on the flow files routed to the 'success' "
+                + "and 'failure' relationships, and contains the number of records from the incoming flow file. All records in a flow file are committed as a single transaction."),
+        @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' "
+                + "and 'failure' relationships, and contains the target table name in 'databaseName.tableName' format.")
+})
+@RequiresInstanceClassLoading
+public class PutHive3Streaming extends AbstractProcessor {
+    // Attributes
+    public static final String HIVE_STREAMING_RECORD_COUNT_ATTR = "hivestreaming.record.count";
+
+    private static final String CLIENT_CACHE_DISABLED_PROPERTY = "hcatalog.hive.client.cache.disabled";
+
+    // Properties
+    static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
+            .name("record-reader")
+            .displayName("Record Reader")
+            .description("The service for reading records from incoming flow files.")
+            .identifiesControllerService(RecordReaderFactory.class)
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor METASTORE_URI = new PropertyDescriptor.Builder()
+            .name("hive3-stream-metastore-uri")
+            .displayName("Hive Metastore URI")
+            .description("The URI location for the Hive Metastore. Note that this is not the location of the Hive Server. The default port for the "
+                    + "Hive metastore is 9043.")
+            .required(true)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(StandardValidators.URI_VALIDATOR)
+            .addValidator(StandardValidators.createRegexMatchingValidator(Pattern.compile("(^[^/]+.*[^/]+$|^[^/]+$|^$)"))) // no start with / or end with /
+            .build();
+
+    static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
+            .name("hive3-config-resources")
+            .displayName("Hive Configuration Resources")
+            .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop "
+                    + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication "
+                    + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Also note that if Max Concurrent Tasks is set "
+                    + "to a number greater than one, the 'hcatalog.hive.client.cache.disabled' property will be forced to 'true' to avoid concurrency issues. "
+                    + "Please see the Hive documentation for more details.")
+            .required(false)
+            .addValidator(HiveUtils.createMultipleFilesExistValidator())
+            .build();
+
+    static final PropertyDescriptor DB_NAME = new PropertyDescriptor.Builder()
+            .name("hive3-stream-database-name")
+            .displayName("Database Name")
+            .description("The name of the database in which to put the data.")
+            .required(true)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
+            .name("hive3-stream-table-name")
+            .displayName("Table Name")
+            .description("The name of the database table in which to put the data.")
+            .required(true)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor PARTITION_VALUES = new PropertyDescriptor.Builder()
+            .name("hive3-stream-part-vals")
+            .displayName("Partition Values")
+            .description("Specifies a comma-separated list of the values for the partition columns of the target table. If the incoming records all have the same values "
+                    + "for the partition columns, those values can be entered here, resulting in a performance gain. If specified, this property will often contain "
+                    + "Expression Language, for example if PartitionRecord is upstream and two partitions 'name' and 'age' are used, then this property can be set to "
+                    + "${name},${age}.")
+            .required(false)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor AUTOCREATE_PARTITIONS = new PropertyDescriptor.Builder()
+            .name("hive3-stream-autocreate-partition")
+            .displayName("Auto-Create Partitions")
+            .description("Flag indicating whether partitions should be automatically created")
+            .required(true)
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .build();
+
+    static final PropertyDescriptor CALL_TIMEOUT = new PropertyDescriptor.Builder()
+            .name("hive3-stream-call-timeout")
+            .displayName("Call Timeout")
+            .description("The number of seconds allowed for a Hive Streaming operation to complete. A value of 0 indicates the processor should wait indefinitely on operations. "
+                    + "Note that although this property supports Expression Language, it will not be evaluated against incoming FlowFile attributes.")
+            .defaultValue("0")
+            .required(true)
+            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+            .build();
+
+    static final PropertyDescriptor DISABLE_STREAMING_OPTIMIZATIONS = new PropertyDescriptor.Builder()
+            .name("hive3-stream-disable-optimizations")
+            .displayName("Disable Streaming Optimizations")
+            .description("Whether to disable streaming optimizations. Disabling streaming optimizations will have significant impact to performance and memory consumption.")
+            .required(true)
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .build();
+
+
+    static final PropertyDescriptor ROLLBACK_ON_FAILURE = RollbackOnFailure.createRollbackOnFailureProperty(
+            "NOTE: When an error occurred after a Hive streaming transaction which is derived from the same input FlowFile is already committed," +
+                    " (i.e. a FlowFile contains more records than 'Records per Transaction' and a failure occurred at the 2nd transaction or later)" +
+                    " then the succeeded records will be transferred to 'success' relationship while the original input FlowFile stays in incoming queue." +
+                    " Duplicated records can be created for the succeeded ones when the same FlowFile is processed again.");
+
+    static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder()
+            .name("kerberos-credentials-service")
+            .displayName("Kerberos Credentials Service")
+            .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos")
+            .identifiesControllerService(KerberosCredentialsService.class)
+            .required(false)
+            .build();
+
+    // Relationships
+    public static final Relationship REL_SUCCESS = new Relationship.Builder()
+            .name("success")
+            .description("A FlowFile containing Avro records routed to this relationship after the record has been successfully transmitted to Hive.")
+            .build();
+
+    public static final Relationship REL_FAILURE = new Relationship.Builder()
+            .name("failure")
+            .description("A FlowFile containing Avro records routed to this relationship if the record could not be transmitted to Hive.")
+            .build();
+
+    public static final Relationship REL_RETRY = new Relationship.Builder()
+            .name("retry")
+            .description("The incoming FlowFile is routed to this relationship if its records cannot be transmitted to Hive. Note that "
+                    + "some records may have been processed successfully, they will be routed (as Avro flow files) to the success relationship. "
+                    + "The combination of the retry, success, and failure relationships indicate how many records succeeded and/or failed. This "
+                    + "can be used to provide a retry capability since full rollback is not possible.")
+            .build();
+
+    private List<PropertyDescriptor> propertyDescriptors;
+    private Set<Relationship> relationships;
+
+    protected volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
+    protected volatile UserGroupInformation ugi;
+    protected volatile HiveConf hiveConfig;
+
+    protected volatile int callTimeout;
+    protected ExecutorService callTimeoutPool;
+    protected volatile boolean rollbackOnFailure;
+
+    // Holder of cached Configuration information so validation does not reload the same config over and over
+    private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
+
+    @Override
+    protected void init(ProcessorInitializationContext context) {
+        List<PropertyDescriptor> props = new ArrayList<>();
+        props.add(RECORD_READER);
+        props.add(METASTORE_URI);
+        props.add(HIVE_CONFIGURATION_RESOURCES);
+        props.add(DB_NAME);
+        props.add(TABLE_NAME);
+        props.add(PARTITION_VALUES);
+        props.add(AUTOCREATE_PARTITIONS);
+        props.add(CALL_TIMEOUT);
+        props.add(DISABLE_STREAMING_OPTIMIZATIONS);
+        props.add(ROLLBACK_ON_FAILURE);
+        props.add(KERBEROS_CREDENTIALS_SERVICE);
+
+        propertyDescriptors = Collections.unmodifiableList(props);
+
+        Set<Relationship> _relationships = new HashSet<>();
+        _relationships.add(REL_SUCCESS);
+        _relationships.add(REL_FAILURE);
+        _relationships.add(REL_RETRY);
+        relationships = Collections.unmodifiableSet(_relationships);
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return propertyDescriptors;
+    }
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return relationships;
+    }
+
+    @Override
+    protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
+        boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet();
+
+        final List<ValidationResult> problems = new ArrayList<>();
+
+        final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
+
+        final String resolvedPrincipal = credentialsService != null ? credentialsService.getPrincipal() : null;
+        final String resolvedKeytab = credentialsService != null ? credentialsService.getKeytab() : null;
+        if (confFileProvided) {
+            final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
+            problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, validationResourceHolder, getLogger()));
+        }
+
+        return problems;
+    }
+
+    @OnScheduled
+    public void setup(final ProcessContext context) {
+        ComponentLog log = getLogger();
+        rollbackOnFailure = context.getProperty(ROLLBACK_ON_FAILURE).asBoolean();
+
+        final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).getValue();
+        hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
+
+        // If more than one concurrent task, force 'hcatalog.hive.client.cache.disabled' to true
+        if (context.getMaxConcurrentTasks() > 1) {
+            hiveConfig.setBoolean(CLIENT_CACHE_DISABLED_PROPERTY, true);
+        }
+
+        // add any dynamic properties to the Hive configuration
+        for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
+            final PropertyDescriptor descriptor = entry.getKey();
+            if (descriptor.isDynamic()) {
+                hiveConfig.set(descriptor.getName(), entry.getValue());
+            }
+        }
+
+        hiveConfigurator.preload(hiveConfig);
+
+        if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
+            final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
+
+            final String resolvedPrincipal = credentialsService.getPrincipal();
+            final String resolvedKeytab = credentialsService.getKeytab();
+
+            log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[]{resolvedPrincipal, resolvedKeytab});
+            try {
+                ugi = hiveConfigurator.authenticate(hiveConfig, resolvedPrincipal, resolvedKeytab);
+            } catch (AuthenticationFailedException ae) {
+                throw new ProcessException("Kerberos authentication failed for Hive Streaming", ae);
+            }
+
+            log.info("Successfully logged in as principal {} with keytab {}", new Object[]{resolvedPrincipal, resolvedKeytab});
+        } else {
+            ugi = null;
+        }
+
+        callTimeout = context.getProperty(CALL_TIMEOUT).evaluateAttributeExpressions().asInteger() * 1000; // milliseconds
+        String timeoutName = "put-hive3-streaming-%d";
+        this.callTimeoutPool = Executors.newFixedThreadPool(1,
+                new ThreadFactoryBuilder().setNameFormat(timeoutName).build());
+    }
+
+    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+        FlowFile flowFile = session.get();
+        if (flowFile == null) {
+            return;
+        }
+
+        final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
+        final String dbName = context.getProperty(DB_NAME).evaluateAttributeExpressions(flowFile).getValue();
+        final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
+
+        final ComponentLog log = getLogger();
+        final String metastoreUri = context.getProperty(METASTORE_URI).evaluateAttributeExpressions(flowFile).getValue();
+
+        final String partitionValuesString = context.getProperty(PARTITION_VALUES).evaluateAttributeExpressions(flowFile).getValue();
+        final boolean autoCreatePartitions = context.getProperty(AUTOCREATE_PARTITIONS).asBoolean();
+        final boolean disableStreamingOptimizations = context.getProperty(DISABLE_STREAMING_OPTIMIZATIONS).asBoolean();
+
+        HiveOptions o = new HiveOptions(metastoreUri, dbName, tableName)
+                .withHiveConf(hiveConfig)
+                .withAutoCreatePartitions(autoCreatePartitions)
+                .withCallTimeout(callTimeout)
+                .withStreamingOptimizations(!disableStreamingOptimizations);
+
+        if (!StringUtils.isEmpty(partitionValuesString)) {
+            List<String> staticPartitionValues = Arrays.stream(partitionValuesString.split(",")).filter(Objects::nonNull).map(String::trim).collect(Collectors.toList());
+            o = o.withStaticPartitionValues(staticPartitionValues);
+        }
+
+        if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
+            final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
+            o = o.withKerberosPrincipal(credentialsService.getPrincipal()).withKerberosKeytab(credentialsService.getKeytab());
+        }
+
+        final HiveOptions options = o;
+
+        // Store the original class loader, then explicitly set it to this class's classloader (for use by the Hive Metastore)
+        ClassLoader originalClassloader = Thread.currentThread().getContextClassLoader();
+        Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+        StreamingConnection hiveStreamingConnection = null;
+
+        try (final InputStream rawIn = session.read(flowFile)) {
+            final RecordReader reader;
+
+            try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
+
+                // if we fail to create the RecordReader then we want to route to failure, so we need to
+                // handle this separately from the other IOExceptions which normally route to retry
+                try {
+                    reader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
+                } catch (Exception e) {
+                    throw new RecordReaderFactoryException("Unable to create RecordReader", e);
+                }
+
+                hiveStreamingConnection = makeStreamingConnection(options, reader);
+                // Add shutdown handler with higher priority than FileSystem shutdown hook so that streaming connection gets closed first before
+                // filesystem close (to avoid ClosedChannelException)
+                ShutdownHookManager.addShutdownHook(hiveStreamingConnection::close, FileSystem.SHUTDOWN_HOOK_PRIORITY + 1);
+
+                // Write records to Hive streaming, then commit and close
+                hiveStreamingConnection.beginTransaction();
+                hiveStreamingConnection.write(in);
+                hiveStreamingConnection.commitTransaction();
+                rawIn.close();
+
+                Map<String, String> updateAttributes = new HashMap<>();
+                updateAttributes.put(HIVE_STREAMING_RECORD_COUNT_ATTR, Long.toString(hiveStreamingConnection.getConnectionStats().getRecordsWritten()));
+                updateAttributes.put(ATTR_OUTPUT_TABLES, options.getQualifiedTableName());
+                flowFile = session.putAllAttributes(flowFile, updateAttributes);
+                session.getProvenanceReporter().send(flowFile, hiveStreamingConnection.getMetastoreUri());
+                session.transfer(flowFile, REL_SUCCESS);
+            } catch (TransactionError te) {
+                if (rollbackOnFailure) {
+                    throw new ProcessException(te.getLocalizedMessage(), te);
+                } else {
+                    throw new ShouldRetryException(te.getLocalizedMessage(), te);
+                }
+            } catch (RecordReaderFactoryException rrfe) {
+                throw new ProcessException(rrfe);
+            }
+        } catch (InvalidTable | SerializationError | StreamingIOFailure | IOException e) {
+            if (rollbackOnFailure) {
+                if (hiveStreamingConnection != null) {
+                    abortConnection(hiveStreamingConnection);
+                }
+                throw new ProcessException(e.getLocalizedMessage(), e);
+            } else {
+                Map<String, String> updateAttributes = new HashMap<>();
+                updateAttributes.put(HIVE_STREAMING_RECORD_COUNT_ATTR, Long.toString(hiveStreamingConnection.getConnectionStats().getRecordsWritten()));
+                updateAttributes.put(ATTR_OUTPUT_TABLES, options.getQualifiedTableName());
+                flowFile = session.putAllAttributes(flowFile, updateAttributes);
+                session.transfer(flowFile, REL_FAILURE);
+            }
+        } catch (DiscontinuedException e) {
+            // The input FlowFile processing is discontinued. Keep it in the input queue.
+            getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e);
+            session.transfer(flowFile, Relationship.SELF);
+        } catch (ConnectionError ce) {
+            // If we can't connect to the metastore, yield the processor
+            context.yield();
+            throw new ProcessException("A connection to metastore cannot be established", ce);
+        } catch (ShouldRetryException e) {
+            // This exception is already a result of adjusting an error, so simply transfer the FlowFile to retry. Still need to abort the txn
+            getLogger().error(e.getLocalizedMessage(), e);
+            if (hiveStreamingConnection != null) {
+                abortConnection(hiveStreamingConnection);
+            }
+            flowFile = session.penalize(flowFile);
+            session.transfer(flowFile, REL_RETRY);
+        } catch (StreamingException se) {
+            // Handle all other exceptions. These are often record-based exceptions (since Hive will throw a subclass of the exception caught above)
+            Throwable cause = se.getCause();
+            if (cause == null) cause = se;
+            // This is a failure on the incoming data, rollback on failure if specified; otherwise route to failure after penalizing (and abort txn in any case)
+            if (rollbackOnFailure) {
+                if (hiveStreamingConnection != null) {
+                    abortConnection(hiveStreamingConnection);
+                }
+                throw new ProcessException(cause.getLocalizedMessage(), cause);
+            } else {
+                flowFile = session.penalize(flowFile);
+                Map<String, String> updateAttributes = new HashMap<>();
+                updateAttributes.put(HIVE_STREAMING_RECORD_COUNT_ATTR, Long.toString(hiveStreamingConnection.getConnectionStats().getRecordsWritten()));
+                updateAttributes.put(ATTR_OUTPUT_TABLES, options.getQualifiedTableName());
+                flowFile = session.putAllAttributes(flowFile, updateAttributes);
+                session.transfer(flowFile, REL_FAILURE);
+            }
+
+        } catch (Throwable t) {
+            if (hiveStreamingConnection != null) {
+                abortConnection(hiveStreamingConnection);
+            }
+            throw (t instanceof ProcessException) ? (ProcessException) t : new ProcessException(t);
+        } finally {
+            closeConnection(hiveStreamingConnection);
+            // Restore original class loader, might not be necessary but is good practice since the processor task changed it
+            Thread.currentThread().setContextClassLoader(originalClassloader);
+        }
+    }
+
+    StreamingConnection makeStreamingConnection(HiveOptions options, RecordReader reader) throws StreamingException {
+        return HiveStreamingConnection.newBuilder()
+                .withDatabase(options.getDatabaseName())
+                .withTable(options.getTableName())
+                .withStaticPartitionValues(options.getStaticPartitionValues())
+                .withHiveConf(options.getHiveConf())
+                .withRecordWriter(new HiveRecordWriter(reader, getLogger()))
+                .withAgentInfo("NiFi " + this.getClass().getSimpleName() + " [" + this.getIdentifier()
+                        + "] thread " + Thread.currentThread().getId() + "[" + Thread.currentThread().getName() + "]")
+                .connect();
+    }
+
+    @OnStopped
+    public void cleanup() {
+        validationResourceHolder.set(null); // trigger re-validation of resources
+
+        ComponentLog log = getLogger();
+
+        if (callTimeoutPool != null) {
+            callTimeoutPool.shutdown();
+            try {
+                while (!callTimeoutPool.isTerminated()) {
+                    callTimeoutPool.awaitTermination(callTimeout, TimeUnit.MILLISECONDS);
+                }
+            } catch (Throwable t) {
+                log.warn("shutdown interrupted on " + callTimeoutPool, t);
+            }
+            callTimeoutPool = null;
+        }
+
+        ugi = null;
+    }
+
+    private void abortAndCloseConnection(StreamingConnection connection) {
+        try {
+            abortConnection(connection);
+            closeConnection(connection);
+        } catch (Exception ie) {
+            getLogger().warn("unable to close hive connections. ", ie);
+        }
+    }
+
+    /**
+     * Abort current Txn on the connection
+     */
+    private void abortConnection(StreamingConnection connection) {
+        if (connection != null) {
+            try {
+                connection.abortTransaction();
+            } catch (Exception e) {
+                getLogger().error("Failed to abort Hive Streaming transaction " + connection + " due to exception ", e);
+            }
+        }
+    }
+
+    /**
+     * Close the streaming connection
+     */
+    private void closeConnection(StreamingConnection connection) {
+        if (connection != null) {
+            try {
+                connection.close();
+            } catch (Exception e) {
+                getLogger().error("Failed to close Hive Streaming connection " + connection + " due to exception ", e);
+            }
+        }
+    }
+
+    private static class ShouldRetryException extends RuntimeException {
+        private ShouldRetryException(String message, Throwable cause) {
+            super(message, cause);
+        }
+    }
+}
+

[4/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Posted by bb...@apache.org.

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive3QL.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive3QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive3QL.java
new file mode 100644
index 0000000..cb0b000
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive3QL.java
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import java.nio.charset.Charset;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.dbcp.hive.Hive3DBCPService;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessSessionFactory;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processor.util.pattern.PartialFunctions;
+import org.apache.nifi.util.StopWatch;
+import org.apache.nifi.util.hive.CsvOutputOptions;
+import org.apache.nifi.util.hive.HiveJdbcCommon;
+
+import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO;
+
+@EventDriven
+@InputRequirement(Requirement.INPUT_ALLOWED)
+@Tags({"hive", "sql", "select", "jdbc", "query", "database"})
+@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format."
+        + " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on "
+        + "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. "
+        + "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the "
+        + "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.")
+@WritesAttributes({
+        @WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."),
+        @WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."),
+        @WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."),
+        @WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set "
+                + "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."),
+        @WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of  "
+                + "FlowFiles produced by a single ResultSet. This can be used in conjunction with the "
+                + "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."),
+        @WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of "
+                + "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be "
+                + "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order  "
+                + "FlowFiles were produced"),
+        @WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.")
+})
+public class SelectHive3QL extends AbstractHive3QLProcessor {
+
+    static final String RESULT_ROW_COUNT = "selecthiveql.row.count";
+
+    // Relationships
+    static final Relationship REL_SUCCESS = new Relationship.Builder()
+            .name("success")
+            .description("Successfully created FlowFile from HiveQL query result set.")
+            .build();
+    static final Relationship REL_FAILURE = new Relationship.Builder()
+            .name("failure")
+            .description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship")
+            .build();
+
+
+    static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder()
+            .name("hive-query")
+            .displayName("HiveQL Select Query")
+            .description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.")
+            .required(false)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
+            .name("hive-fetch-size")
+            .displayName("Fetch Size")
+            .description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be "
+                    + "honored and/or exact. If the value specified is zero, then the hint is ignored.")
+            .defaultValue("0")
+            .required(true)
+            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder()
+            .name("hive-max-rows")
+            .displayName("Max Rows Per Flow File")
+            .description("The maximum number of result rows that will be included in a single FlowFile. " +
+                    "This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.")
+            .defaultValue("0")
+            .required(true)
+            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder()
+            .name("hive-max-frags")
+            .displayName("Maximum Number of Fragments")
+            .description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " +
+                    "This prevents OutOfMemoryError when this processor ingests huge table.")
+            .defaultValue("0")
+            .required(true)
+            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder()
+            .name("csv-header")
+            .displayName("CSV Header")
+            .description("Include Header in Output")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder()
+            .name("csv-alt-header")
+            .displayName("Alternate CSV Header")
+            .description("Comma separated list of header fields")
+            .required(false)
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder()
+            .name("csv-delimiter")
+            .displayName("CSV Delimiter")
+            .description("CSV Delimiter used to separate fields")
+            .required(true)
+            .defaultValue(",")
+            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .build();
+
+    static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder()
+            .name("csv-quote")
+            .displayName("CSV Quote")
+            .description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .build();
+    static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder()
+            .name("csv-escape")
+            .displayName("CSV Escape")
+            .description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.")
+            .required(true)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder()
+            .name("hive-output-format")
+            .displayName("Output Format")
+            .description("How to represent the records coming from Hive (Avro, CSV, e.g.)")
+            .required(true)
+            .allowableValues(AVRO, CSV)
+            .defaultValue(AVRO)
+            .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+            .build();
+
+    private final static List<PropertyDescriptor> propertyDescriptors;
+    private final static Set<Relationship> relationships;
+
+    /*
+     * Will ensure that the list of property descriptors is built only once.
+     * Will also create a Set of relationships
+     */
+    static {
+        List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
+        _propertyDescriptors.add(HIVE_DBCP_SERVICE);
+        _propertyDescriptors.add(HIVEQL_SELECT_QUERY);
+        _propertyDescriptors.add(FETCH_SIZE);
+        _propertyDescriptors.add(QUERY_TIMEOUT);
+        _propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE);
+        _propertyDescriptors.add(MAX_FRAGMENTS);
+        _propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT);
+        _propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO);
+        _propertyDescriptors.add(HIVEQL_CSV_HEADER);
+        _propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER);
+        _propertyDescriptors.add(HIVEQL_CSV_DELIMITER);
+        _propertyDescriptors.add(HIVEQL_CSV_QUOTE);
+        _propertyDescriptors.add(HIVEQL_CSV_ESCAPE);
+        _propertyDescriptors.add(CHARSET);
+        propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
+
+        Set<Relationship> _relationships = new HashSet<>();
+        _relationships.add(REL_SUCCESS);
+        _relationships.add(REL_FAILURE);
+        relationships = Collections.unmodifiableSet(_relationships);
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return propertyDescriptors;
+    }
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return relationships;
+    }
+
+    @OnScheduled
+    public void setup(ProcessContext context) {
+        // If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization
+        if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) {
+            final String errorString = "Either the Select Query must be specified or there must be an incoming connection "
+                    + "providing flowfile(s) containing a SQL select query";
+            getLogger().error(errorString);
+            throw new ProcessException(errorString);
+        }
+    }
+
+    @Override
+    public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
+        PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session));
+    }
+
+    private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
+        FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
+        FlowFile flowfile = null;
+
+        // If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
+        // However, if we have no FlowFile and we have connections coming from other Processors, then
+        // we know that we should run only if we have a FlowFile.
+        if (context.hasIncomingConnection()) {
+            if (fileToProcess == null && context.hasNonLoopConnection()) {
+                return;
+            }
+        }
+
+        final ComponentLog logger = getLogger();
+        final Hive3DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive3DBCPService.class);
+        final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
+
+        final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
+
+        // Source the SQL
+        final String selectQuery;
+
+        if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
+            selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
+        } else {
+            // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
+            // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
+            final StringBuilder queryContents = new StringBuilder();
+            session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
+            selectQuery = queryContents.toString();
+        }
+
+
+        final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
+        final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
+        final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
+                ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
+                : 0;
+        final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
+        final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
+        final StopWatch stopWatch = new StopWatch(true);
+        final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
+        final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
+        final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
+        final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
+        final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
+        final String fragmentIdentifier = UUID.randomUUID().toString();
+
+        try (final Connection con = dbcpService.getConnection();
+             final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())
+        ) {
+
+            st.setQueryTimeout(context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asInteger());
+
+            if (fetchSize != null && fetchSize > 0) {
+                try {
+                    st.setFetchSize(fetchSize);
+                } catch (SQLException se) {
+                    // Not all drivers support this, just log the error (at debug level) and move on
+                    logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se);
+                }
+            }
+
+            final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
+            try {
+                logger.debug("Executing query {}", new Object[]{selectQuery});
+                if (flowbased) {
+                    // Hive JDBC Doesn't Support this yet:
+                    // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
+                    // int paramCount = pmd.getParameterCount();
+
+                    // Alternate way to determine number of params in SQL.
+                    int paramCount = StringUtils.countMatches(selectQuery, "?");
+
+                    if (paramCount > 0) {
+                        setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
+                    }
+                }
+
+                final ResultSet resultSet;
+
+                try {
+                    resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery));
+                } catch (SQLException se) {
+                    // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
+                    flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
+                    fileToProcess = null;
+                    throw se;
+                }
+
+                int fragmentIndex = 0;
+                String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
+                while (true) {
+                    final AtomicLong nrOfRows = new AtomicLong(0L);
+                    flowfile = (flowfile == null) ? session.create() : session.create(flowfile);
+                    if (baseFilename == null) {
+                        baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
+                    }
+                    try {
+                        flowfile = session.write(flowfile, out -> {
+                            try {
+                                if (AVRO.equals(outputFormat)) {
+                                    nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
+                                } else if (CSV.equals(outputFormat)) {
+                                    CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
+                                    nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
+                                } else {
+                                    nrOfRows.set(0L);
+                                    throw new ProcessException("Unsupported output format: " + outputFormat);
+                                }
+                            } catch (final SQLException | RuntimeException e) {
+                                throw new ProcessException("Error during database query or conversion of records.", e);
+                            }
+                        });
+                    } catch (ProcessException e) {
+                        // Add flowfile to results before rethrowing so it will be removed from session in outer catch
+                        resultSetFlowFiles.add(flowfile);
+                        throw e;
+                    }
+
+                    if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
+                        final Map<String, String> attributes = new HashMap<>();
+                        // Set attribute for how many rows were selected
+                        attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
+
+                        try {
+                            // Set input/output table names by parsing the query
+                            attributes.putAll(toQueryTableAttributes(findTableNames(selectQuery)));
+                        } catch (Exception e) {
+                            // If failed to parse the query, just log a warning message, but continue.
+                            getLogger().warn("Failed to parse query: {} due to {}", new Object[]{selectQuery, e}, e);
+                        }
+
+                        // Set MIME type on output document and add extension to filename
+                        if (AVRO.equals(outputFormat)) {
+                            attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
+                            attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
+                        } else if (CSV.equals(outputFormat)) {
+                            attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
+                            attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
+                        }
+
+                        if (maxRowsPerFlowFile > 0) {
+                            attributes.put("fragment.identifier", fragmentIdentifier);
+                            attributes.put("fragment.index", String.valueOf(fragmentIndex));
+                        }
+
+                        flowfile = session.putAllAttributes(flowfile, attributes);
+
+                        logger.info("{} contains {} Avro records; transferring to 'success'",
+                                new Object[]{flowfile, nrOfRows.get()});
+
+                        if (context.hasIncomingConnection()) {
+                            // If the flow file came from an incoming connection, issue a Modify Content provenance event
+
+                            session.getProvenanceReporter().modifyContent(flowfile, "Retrieved " + nrOfRows.get() + " rows",
+                                    stopWatch.getElapsed(TimeUnit.MILLISECONDS));
+                        } else {
+                            // If we created a flow file from rows received from Hive, issue a Receive provenance event
+                            session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
+                        }
+                        resultSetFlowFiles.add(flowfile);
+                    } else {
+                        // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
+                        session.remove(flowfile);
+                        break;
+                    }
+
+                    fragmentIndex++;
+                    if (maxFragments > 0 && fragmentIndex >= maxFragments) {
+                        break;
+                    }
+                }
+
+                for (int i = 0; i < resultSetFlowFiles.size(); i++) {
+                    // Set count on all FlowFiles
+                    if (maxRowsPerFlowFile > 0) {
+                        resultSetFlowFiles.set(i,
+                                session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
+                    }
+                }
+
+            } catch (final SQLException e) {
+                throw e;
+            }
+
+            session.transfer(resultSetFlowFiles, REL_SUCCESS);
+
+        } catch (final ProcessException | SQLException e) {
+            logger.error("Issue processing SQL {} due to {}.", new Object[]{selectQuery, e});
+            if (flowfile == null) {
+                // This can happen if any exceptions occur while setting up the connection, statement, etc.
+                logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
+                        new Object[]{selectQuery, e});
+                context.yield();
+            } else {
+                if (context.hasIncomingConnection()) {
+                    logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
+                            new Object[]{selectQuery, flowfile, e});
+                    flowfile = session.penalize(flowfile);
+                } else {
+                    logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
+                            new Object[]{selectQuery, e});
+                    context.yield();
+                }
+                session.transfer(flowfile, REL_FAILURE);
+            }
+        } finally {
+            if (fileToProcess != null) {
+                session.remove(fileToProcess);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/PutORC.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/PutORC.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/PutORC.java
new file mode 100644
index 0000000..a0a5d13
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/PutORC.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.orc;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
+import org.apache.hadoop.hive.ql.io.orc.Writer;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.ReadsAttribute;
+import org.apache.nifi.annotation.behavior.Restricted;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.avro.AvroTypeUtil;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.DataUnit;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processors.hadoop.AbstractPutHDFSRecord;
+import org.apache.nifi.processors.hadoop.record.HDFSRecordWriter;
+import org.apache.nifi.processors.orc.record.ORCHDFSRecordWriter;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.util.hive.HiveUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@Tags({"put", "ORC", "hadoop", "HDFS", "filesystem", "restricted", "record"})
+@CapabilityDescription("Reads records from an incoming FlowFile using the provided Record Reader, and writes those records " +
+        "to a ORC file in the location/filesystem specified in the configuration.")
+@ReadsAttribute(attribute = "filename", description = "The name of the file to write comes from the value of this attribute.")
+@WritesAttributes({
+        @WritesAttribute(attribute = "filename", description = "The name of the file is stored in this attribute."),
+        @WritesAttribute(attribute = "absolute.hdfs.path", description = "The absolute path to the file is stored in this attribute."),
+        @WritesAttribute(attribute = "record.count", description = "The number of records written to the ORC file"),
+        @WritesAttribute(attribute = "hive.ddl", description = "Creates a partial Hive DDL statement for creating an external table in Hive from the destination folder. "
+                + "This can be used in ReplaceText for setting the content to the DDL. To make it valid DDL, add \"LOCATION '<path_to_orc_file_in_hdfs>'\", where "
+                + "the path is the directory that contains this ORC file on HDFS. For example, this processor can send flow files downstream to ReplaceText to set the content "
+                + "to this DDL (plus the LOCATION clause as described), then to PutHiveQL processor to create the table if it doesn't exist.")
+})
+@Restricted("Provides operator the ability to write to any file that NiFi has access to in HDFS or the local filesystem.")
+public class PutORC extends AbstractPutHDFSRecord {
+
+    public static final String HIVE_DDL_ATTRIBUTE = "hive.ddl";
+
+    public static final PropertyDescriptor ORC_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
+            .name("putorc-config-resources")
+            .displayName("ORC Configuration Resources")
+            .description("A file or comma separated list of files which contains the ORC configuration (hive-site.xml, e.g.). Without this, Hadoop "
+                    + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Please see the ORC documentation for more details.")
+            .required(false).addValidator(HiveUtils.createMultipleFilesExistValidator()).build();
+
+    public static final PropertyDescriptor STRIPE_SIZE = new PropertyDescriptor.Builder()
+            .name("putorc-stripe-size")
+            .displayName("Stripe Size")
+            .description("The size of the memory buffer (in bytes) for writing stripes to an ORC file")
+            .required(true)
+            .addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
+            .defaultValue("64 MB")
+            .build();
+
+    public static final PropertyDescriptor BUFFER_SIZE = new PropertyDescriptor.Builder()
+            .name("putorc-buffer-size")
+            .displayName("Buffer Size")
+            .description("The maximum size of the memory buffers (in bytes) used for compressing and storing a stripe in memory. This is a hint to the ORC writer, "
+                    + "which may choose to use a smaller buffer size based on stripe size and number of columns for efficient stripe writing and memory utilization.")
+            .required(true)
+            .addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
+            .defaultValue("10 KB")
+            .build();
+
+    static final PropertyDescriptor HIVE_TABLE_NAME = new PropertyDescriptor.Builder()
+            .name("putorc-hive-table-name")
+            .displayName("Hive Table Name")
+            .description("An optional table name to insert into the hive.ddl attribute. The generated DDL can be used by "
+                    + "a PutHive3QL processor (presumably after a PutHDFS processor) to create a table backed by the converted ORC file. "
+                    + "If this property is not provided, the full name (including namespace) of the incoming Avro record will be normalized "
+                    + "and used as the table name.")
+            .required(false)
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .build();
+
+    static final PropertyDescriptor HIVE_FIELD_NAMES = new PropertyDescriptor.Builder()
+            .name("putorc-hive-field-names")
+            .displayName("Normalize Field Names for Hive")
+            .description("Whether to normalize field names for Hive (force lowercase, e.g.). If the ORC file is going to "
+                    + "be part of a Hive table, this property should be set to true. To preserve the original field names from the "
+                    + "schema, this property should be set to false.")
+            .required(true)
+            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
+            .allowableValues("true", "false")
+            .defaultValue("true")
+            .build();
+
+
+    public static final List<AllowableValue> COMPRESSION_TYPES;
+
+    static {
+        final List<AllowableValue> compressionTypes = new ArrayList<>();
+        compressionTypes.add(new AllowableValue("NONE", "NONE", "No compression"));
+        compressionTypes.add(new AllowableValue("ZLIB", "ZLIB", "ZLIB compression"));
+        compressionTypes.add(new AllowableValue("SNAPPY", "SNAPPY", "Snappy compression"));
+        compressionTypes.add(new AllowableValue("LZO", "LZO", "LZO compression"));
+        COMPRESSION_TYPES = Collections.unmodifiableList(compressionTypes);
+    }
+
+    @Override
+    public List<AllowableValue> getCompressionTypes(final ProcessorInitializationContext context) {
+        return COMPRESSION_TYPES;
+    }
+
+    @Override
+    public String getDefaultCompressionType(final ProcessorInitializationContext context) {
+        return "NONE";
+    }
+
+    @Override
+    public List<PropertyDescriptor> getAdditionalProperties() {
+        final List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
+        _propertyDescriptors.add(ORC_CONFIGURATION_RESOURCES);
+        _propertyDescriptors.add(STRIPE_SIZE);
+        _propertyDescriptors.add(BUFFER_SIZE);
+        _propertyDescriptors.add(HIVE_TABLE_NAME);
+        _propertyDescriptors.add(HIVE_FIELD_NAMES);
+        return Collections.unmodifiableList(_propertyDescriptors);
+    }
+
+    @Override
+    public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema)
+            throws IOException, SchemaNotFoundException {
+
+        final Schema avroSchema = AvroTypeUtil.extractAvroSchema(schema);
+
+        final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
+        final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
+        final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
+        final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean();
+        TypeInfo orcSchema = NiFiOrcUtils.getOrcField(avroSchema, normalizeForHive);
+        final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize);
+        final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet()
+                ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue()
+                : NiFiOrcUtils.normalizeHiveTableName(avroSchema.getFullName());
+        final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean();
+
+        return new ORCHDFSRecordWriter(orcWriter, avroSchema, hiveTableName, hiveFieldNames);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/record/ORCHDFSRecordWriter.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/record/ORCHDFSRecordWriter.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/record/ORCHDFSRecordWriter.java
new file mode 100644
index 0000000..bd386a0
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/processors/orc/record/ORCHDFSRecordWriter.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.orc.record;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
+import org.apache.hadoop.hive.ql.io.orc.Writer;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.nifi.processors.hadoop.record.HDFSRecordWriter;
+import org.apache.nifi.serialization.WriteResult;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordSet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.nifi.processors.orc.PutORC.HIVE_DDL_ATTRIBUTE;
+
+/**
+ * HDFSRecordWriter that writes ORC files using Avro as the schema representation.
+ */
+
+public class ORCHDFSRecordWriter implements HDFSRecordWriter {
+
+    private final Schema avroSchema;
+    private final TypeInfo orcSchema;
+    private final Writer orcWriter;
+    private final String hiveTableName;
+    private final boolean hiveFieldNames;
+    private final List<Schema.Field> recordFields;
+    private final int numRecordFields;
+    private Object[] workingRow;
+
+    public ORCHDFSRecordWriter(final Writer orcWriter, final Schema avroSchema, final String hiveTableName, final boolean hiveFieldNames) {
+        this.avroSchema = avroSchema;
+        this.orcWriter = orcWriter;
+        this.hiveFieldNames = hiveFieldNames;
+        this.orcSchema = NiFiOrcUtils.getOrcField(avroSchema, this.hiveFieldNames);
+        this.hiveTableName = hiveTableName;
+        this.recordFields = avroSchema != null ? avroSchema.getFields() : null;
+        this.numRecordFields = recordFields != null ? recordFields.size() : -1;
+        // Reuse row object
+        this.workingRow = numRecordFields > -1 ? new Object[numRecordFields] : null;
+    }
+
+    @Override
+    public void write(final Record record) throws IOException {
+        if (recordFields != null) {
+            for (int i = 0; i < numRecordFields; i++) {
+                final Schema.Field field = recordFields.get(i);
+                final Schema fieldSchema = field.schema();
+                final String fieldName = field.name();
+                Object o = record.getValue(fieldName);
+                try {
+                    workingRow[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema, hiveFieldNames), o, hiveFieldNames);
+                } catch (ArrayIndexOutOfBoundsException aioobe) {
+                    final String errorMsg = "Index out of bounds for column " + i + ", type " + fieldName + ", and object " + o.toString();
+                    throw new IOException(errorMsg, aioobe);
+                }
+            }
+            orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, workingRow));
+        }
+    }
+
+    /**
+     * @param recordSet the RecordSet to write
+     * @return the result of writing the record set
+     * @throws IOException if an I/O error happens reading from the RecordSet, or writing a Record
+     */
+    public WriteResult write(final RecordSet recordSet) throws IOException {
+        int recordCount = 0;
+
+        Record record;
+        while ((record = recordSet.next()) != null) {
+            write(record);
+            recordCount++;
+        }
+
+        // Add Hive DDL Attribute
+        String hiveDDL = NiFiOrcUtils.generateHiveDDL(avroSchema, hiveTableName, hiveFieldNames);
+        Map<String, String> attributes = new HashMap<String, String>() {{
+            put(HIVE_DDL_ATTRIBUTE, hiveDDL);
+        }};
+
+        return WriteResult.of(recordCount, attributes);
+    }
+
+    @Override
+    public void close() throws IOException {
+        orcWriter.close();
+    }
+
+}
+

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java
new file mode 100644
index 0000000..70cc6c1
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.hive;
+
+public class AuthenticationFailedException extends Exception {
+    public AuthenticationFailedException(String reason, Exception cause) {
+        super(reason, cause);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java
new file mode 100644
index 0000000..3688912
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.hive;
+
+public class CsvOutputOptions {
+
+    private boolean header = true;
+    private String altHeader = null;
+    private String delimiter = ",";
+    private boolean quote = false;
+    private boolean escape = true;
+
+    private int maxRowsPerFlowFile = 0;
+
+    public boolean isHeader() {
+        return header;
+    }
+
+    public String getAltHeader() {
+        return altHeader;
+    }
+
+
+    public String getDelimiter() {
+        return delimiter;
+    }
+
+
+    public boolean isQuote() {
+        return quote;
+    }
+
+    public boolean isEscape() {
+        return escape;
+    }
+
+    public int getMaxRowsPerFlowFile() {
+        return maxRowsPerFlowFile;
+    }
+
+    public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) {
+        this.header = header;
+        this.altHeader = altHeader;
+        this.delimiter = delimiter;
+        this.quote = quote;
+        this.escape = escape;
+        this.maxRowsPerFlowFile = maxRowsPerFlowFile;
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java
new file mode 100644
index 0000000..6d53683
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.hive;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.hadoop.KerberosProperties;
+import org.apache.nifi.hadoop.SecurityUtil;
+import org.apache.nifi.logging.ComponentLog;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Created by mburgess on 5/4/16.
+ */
+public class HiveConfigurator {
+
+    public Collection<ValidationResult> validate(String configFiles, String principal, String keyTab, AtomicReference<ValidationResources> validationResourceHolder, ComponentLog log) {
+
+        final List<ValidationResult> problems = new ArrayList<>();
+        ValidationResources resources = validationResourceHolder.get();
+
+        // if no resources in the holder, or if the holder has different resources loaded,
+        // then load the Configuration and set the new resources in the holder
+        if (resources == null || !configFiles.equals(resources.getConfigResources())) {
+            log.debug("Reloading validation resources");
+            resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
+            validationResourceHolder.set(resources);
+        }
+
+        final Configuration hiveConfig = resources.getConfiguration();
+
+        problems.addAll(KerberosProperties.validatePrincipalAndKeytab(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, log));
+
+        return problems;
+    }
+
+    public HiveConf getConfigurationFromFiles(final String configFiles) {
+        final HiveConf hiveConfig = new HiveConf();
+        if (StringUtils.isNotBlank(configFiles)) {
+            for (final String configFile : configFiles.split(",")) {
+                hiveConfig.addResource(new Path(configFile.trim()));
+            }
+        }
+        return hiveConfig;
+    }
+
+    public void preload(Configuration configuration) {
+        try {
+            FileSystem.get(configuration).close();
+            UserGroupInformation.setConfiguration(configuration);
+        } catch (IOException ioe) {
+            // Suppress exception as future uses of this configuration will fail
+        }
+    }
+
+    /**
+     * As of Apache NiFi 1.5.0, due to changes made to
+     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
+     * class to authenticate a principal with Kerberos, Hive controller services no longer
+     * attempt relogins explicitly.  For more information, please read the documentation for
+     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
+     * <p/>
+     * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
+     * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
+     * controller service was enabled.  The use of a separate thread to explicitly relogin could cause race conditions
+     * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
+     * {@link UserGroupInformation} instance.  One of these threads could leave the
+     * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
+     * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
+     * authentication attempts that would leave the Hive controller service in an unrecoverable state.
+     *
+     * @see SecurityUtil#loginKerberos(Configuration, String, String)
+     */
+    public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException {
+        UserGroupInformation ugi;
+        try {
+            ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab);
+        } catch (IOException ioe) {
+            throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
+        }
+        return ugi;
+    }
+
+    /**
+     * As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper
+     * method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a
+     * {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins.
+     *
+     * @see HiveConfigurator#authenticate(Configuration, String, String)
+     */
+    @Deprecated
+    public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException {
+        return authenticate(hiveConfig, principal, keyTab);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java
new file mode 100644
index 0000000..ff06495
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.hive;
+
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.SchemaBuilder.FieldAssembler;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.nifi.components.PropertyDescriptor;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static java.sql.Types.ARRAY;
+import static java.sql.Types.BIGINT;
+import static java.sql.Types.BINARY;
+import static java.sql.Types.BIT;
+import static java.sql.Types.BLOB;
+import static java.sql.Types.BOOLEAN;
+import static java.sql.Types.CHAR;
+import static java.sql.Types.CLOB;
+import static java.sql.Types.DATE;
+import static java.sql.Types.DECIMAL;
+import static java.sql.Types.DOUBLE;
+import static java.sql.Types.FLOAT;
+import static java.sql.Types.INTEGER;
+import static java.sql.Types.JAVA_OBJECT;
+import static java.sql.Types.LONGNVARCHAR;
+import static java.sql.Types.LONGVARBINARY;
+import static java.sql.Types.LONGVARCHAR;
+import static java.sql.Types.NCHAR;
+import static java.sql.Types.NUMERIC;
+import static java.sql.Types.NVARCHAR;
+import static java.sql.Types.REAL;
+import static java.sql.Types.ROWID;
+import static java.sql.Types.SMALLINT;
+import static java.sql.Types.STRUCT;
+import static java.sql.Types.TIME;
+import static java.sql.Types.TIMESTAMP;
+import static java.sql.Types.TINYINT;
+import static java.sql.Types.VARBINARY;
+import static java.sql.Types.VARCHAR;
+
+/**
+ * JDBC / HiveQL common functions.
+ */
+public class HiveJdbcCommon {
+
+    public static final String AVRO = "Avro";
+    public static final String CSV = "CSV";
+
+    public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary";
+    public static final String CSV_MIME_TYPE = "text/csv";
+
+
+    public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder()
+            .name("hive-normalize-avro")
+            .displayName("Normalize Table/Column Names")
+            .description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods "
+                    + "will be changed to underscores in order to build a valid Avro record.")
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .required(true)
+            .build();
+
+    public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException {
+        return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null);
+    }
+
+
+    public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback)
+            throws SQLException, IOException {
+        final Schema schema = createSchema(rs, recordName, convertNames);
+        final GenericRecord rec = new GenericData.Record(schema);
+
+        final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
+        try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
+            dataFileWriter.create(schema, outStream);
+
+            final ResultSetMetaData meta = rs.getMetaData();
+            final int nrOfColumns = meta.getColumnCount();
+            long nrOfRows = 0;
+            while (rs.next()) {
+                if (callback != null) {
+                    callback.processRow(rs);
+                }
+                for (int i = 1; i <= nrOfColumns; i++) {
+                    final int javaSqlType = meta.getColumnType(i);
+                    Object value = rs.getObject(i);
+
+                    if (value == null) {
+                        rec.put(i - 1, null);
+
+                    } else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) {
+                        // bytes requires little bit different handling
+                        ByteBuffer bb = null;
+                        if (value instanceof byte[]) {
+                            bb = ByteBuffer.wrap((byte[]) value);
+                        } else if (value instanceof ByteBuffer) {
+                            bb = (ByteBuffer) value;
+                        }
+                        if (bb != null) {
+                            rec.put(i - 1, bb);
+                        } else {
+                            throw new IOException("Could not process binary object of type " + value.getClass().getName());
+                        }
+
+                    } else if (value instanceof Byte) {
+                        // tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
+                        // But value is returned by JDBC as java.lang.Byte
+                        // (at least H2 JDBC works this way)
+                        // direct put to avro record results:
+                        // org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
+                        rec.put(i - 1, ((Byte) value).intValue());
+
+                    } else if (value instanceof BigDecimal || value instanceof BigInteger) {
+                        // Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
+                        rec.put(i - 1, value.toString());
+
+                    } else if (value instanceof Number) {
+                        // Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned
+                        // when the JDBC type is 6 (Float) for example.
+                        if (javaSqlType == FLOAT) {
+                            value = rs.getFloat(i);
+                        } else if (javaSqlType == DOUBLE) {
+                            value = rs.getDouble(i);
+                        } else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) {
+                            value = rs.getInt(i);
+                        }
+
+                        rec.put(i - 1, value);
+
+                    } else if (value instanceof Boolean) {
+                        rec.put(i - 1, value);
+                    } else {
+                        // The different types that we support are numbers (int, long, double, float),
+                        // as well as boolean values and Strings. Since Avro doesn't provide
+                        // timestamp types, we want to convert those to Strings. So we will cast anything other
+                        // than numbers or booleans to strings by using the toString() method.
+                        rec.put(i - 1, value.toString());
+                    }
+                }
+                dataFileWriter.append(rec);
+                nrOfRows += 1;
+
+                if (maxRows > 0 && nrOfRows == maxRows)
+                    break;
+            }
+
+            return nrOfRows;
+        }
+    }
+
+    public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException {
+        return createSchema(rs, null, false);
+    }
+
+    /**
+     * Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a
+     * fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value.
+     *
+     * @param rs         The result set to convert to Avro
+     * @param recordName The a priori record name to use if it cannot be determined from the result set.
+     * @param convertNames  Whether to convert column/table names to be legal Avro names
+     * @return A Schema object representing the result set converted to an Avro record
+     * @throws SQLException if any error occurs during conversion
+     */
+    public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException {
+        final ResultSetMetaData meta = rs.getMetaData();
+        final int nrOfColumns = meta.getColumnCount();
+        String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName;
+        try {
+            if (nrOfColumns > 0) {
+                // Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column
+                String firstColumnNameFromMeta = meta.getColumnName(1);
+                int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf(".");
+                if (tableNameDelimiter > -1) {
+                    String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter);
+                    if (!StringUtils.isBlank(tableNameFromMeta)) {
+                        tableName = tableNameFromMeta;
+                    }
+                }
+            }
+        } catch (SQLException se) {
+            // Not all drivers support getTableName, so just use the previously-set default
+        }
+
+        if (convertNames) {
+            tableName = normalizeNameForAvro(tableName);
+        }
+        final FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
+
+        /**
+         * Some missing Avro types - Decimal, Date types. May need some additional work.
+         */
+        for (int i = 1; i <= nrOfColumns; i++) {
+            String columnNameFromMeta = meta.getColumnName(i);
+            // Hive returns table.column for column name. Grab the column name as the string after the last period
+            int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
+            String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1);
+            switch (meta.getColumnType(i)) {
+                case CHAR:
+                case LONGNVARCHAR:
+                case LONGVARCHAR:
+                case NCHAR:
+                case NVARCHAR:
+                case VARCHAR:
+                case ARRAY:
+                case STRUCT:
+                case JAVA_OBJECT:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
+                    break;
+
+                case BIT:
+                case BOOLEAN:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
+                    break;
+
+                case INTEGER:
+                    // Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned()
+                    boolean signedType = true;
+                    try {
+                        signedType = meta.isSigned(i);
+                    } catch (SQLException se) {
+                        // Use signed types as default
+                    }
+                    if (signedType) {
+                        builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
+                    } else {
+                        builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
+                    }
+                    break;
+
+                case SMALLINT:
+                case TINYINT:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
+                    break;
+
+                case BIGINT:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
+                    break;
+
+                // java.sql.RowId is interface, is seems to be database
+                // implementation specific, let's convert to String
+                case ROWID:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
+                    break;
+
+                case FLOAT:
+                case REAL:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault();
+                    break;
+
+                case DOUBLE:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault();
+                    break;
+
+                // Did not find direct suitable type, need to be clarified!!!!
+                case DECIMAL:
+                case NUMERIC:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
+                    break;
+
+                // Did not find direct suitable type, need to be clarified!!!!
+                case DATE:
+                case TIME:
+                case TIMESTAMP:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
+                    break;
+
+                case BINARY:
+                case VARBINARY:
+                case LONGVARBINARY:
+                case BLOB:
+                case CLOB:
+                    builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault();
+                    break;
+
+
+                default:
+                    throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type");
+            }
+        }
+
+        return builder.endRecord();
+    }
+
+    public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException {
+        return convertToCsvStream(rs, outStream, null, null, outputOptions);
+    }
+
+    public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions)
+            throws SQLException, IOException {
+
+        final ResultSetMetaData meta = rs.getMetaData();
+        final int nrOfColumns = meta.getColumnCount();
+        List<String> columnNames = new ArrayList<>(nrOfColumns);
+
+        if (outputOptions.isHeader()) {
+            if (outputOptions.getAltHeader() == null) {
+                for (int i = 1; i <= nrOfColumns; i++) {
+                    String columnNameFromMeta = meta.getColumnName(i);
+                    // Hive returns table.column for column name. Grab the column name as the string after the last period
+                    int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
+                    columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1));
+                }
+            } else {
+                String[] altHeaderNames = outputOptions.getAltHeader().split(",");
+                columnNames = Arrays.asList(altHeaderNames);
+            }
+        }
+
+        // Write column names as header row
+        outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
+        if (outputOptions.isHeader()) {
+            outStream.write("\n".getBytes(StandardCharsets.UTF_8));
+        }
+
+        // Iterate over the rows
+        int maxRows = outputOptions.getMaxRowsPerFlowFile();
+        long nrOfRows = 0;
+        while (rs.next()) {
+            if (callback != null) {
+                callback.processRow(rs);
+            }
+            List<String> rowValues = new ArrayList<>(nrOfColumns);
+            for (int i = 1; i <= nrOfColumns; i++) {
+                final int javaSqlType = meta.getColumnType(i);
+                final Object value = rs.getObject(i);
+
+                switch (javaSqlType) {
+                    case CHAR:
+                    case LONGNVARCHAR:
+                    case LONGVARCHAR:
+                    case NCHAR:
+                    case NVARCHAR:
+                    case VARCHAR:
+                        String valueString = rs.getString(i);
+                        if (valueString != null) {
+                            // Removed extra quotes as those are a part of the escapeCsv when required.
+                            StringBuilder sb = new StringBuilder();
+                            if (outputOptions.isQuote()) {
+                                sb.append("\"");
+                                if (outputOptions.isEscape()) {
+                                    sb.append(StringEscapeUtils.escapeCsv(valueString));
+                                } else {
+                                    sb.append(valueString);
+                                }
+                                sb.append("\"");
+                                rowValues.add(sb.toString());
+                            } else {
+                                if (outputOptions.isEscape()) {
+                                    rowValues.add(StringEscapeUtils.escapeCsv(valueString));
+                                } else {
+                                    rowValues.add(valueString);
+                                }
+                            }
+                        } else {
+                            rowValues.add("");
+                        }
+                        break;
+                    case ARRAY:
+                    case STRUCT:
+                    case JAVA_OBJECT:
+                        String complexValueString = rs.getString(i);
+                        if (complexValueString != null) {
+                            rowValues.add(StringEscapeUtils.escapeCsv(complexValueString));
+                        } else {
+                            rowValues.add("");
+                        }
+                        break;
+                    default:
+                        if (value != null) {
+                            rowValues.add(value.toString());
+                        } else {
+                            rowValues.add("");
+                        }
+                }
+            }
+            // Write row values
+            outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
+            outStream.write("\n".getBytes(StandardCharsets.UTF_8));
+            nrOfRows++;
+
+            if (maxRows > 0 && nrOfRows == maxRows)
+                break;
+        }
+        return nrOfRows;
+    }
+
+    public static String normalizeNameForAvro(String inputName) {
+        String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_");
+        if (Character.isDigit(normalizedName.charAt(0))) {
+            normalizedName = "_" + normalizedName;
+        }
+        return normalizedName;
+    }
+
+    /**
+     * An interface for callback methods which allows processing of a row during the convertToXYZStream() processing.
+     * <b>IMPORTANT:</b> This method should only work on the row pointed at by the current ResultSet reference.
+     * Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation.
+     */
+    public interface ResultSetRowCallback {
+        void processRow(ResultSet resultSet) throws IOException;
+    }
+
+    public static Configuration getConfigurationFromFiles(final String configFiles) {
+        final Configuration hiveConfig = new HiveConf();
+        if (StringUtils.isNotBlank(configFiles)) {
+            for (final String configFile : configFiles.split(",")) {
+                hiveConfig.addResource(new Path(configFile.trim()));
+            }
+        }
+        return hiveConfig;
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java
new file mode 100644
index 0000000..ca6e6eb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.util.hive;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+
+import java.io.Serializable;
+import java.util.List;
+
+
+public class HiveOptions implements Serializable {
+
+    protected String databaseName;
+    protected String tableName;
+    protected String metaStoreURI;
+    protected Integer idleTimeout = 60000;
+    protected Integer callTimeout = 0;
+    protected List<String> staticPartitionValues = null;
+    protected Boolean autoCreatePartitions = true;
+    protected String kerberosPrincipal;
+    protected String kerberosKeytab;
+    protected HiveConf hiveConf;
+    protected boolean streamingOptimizations = true;
+
+    public HiveOptions(String metaStoreURI, String databaseName, String tableName) {
+        this.metaStoreURI = metaStoreURI;
+        this.databaseName = databaseName;
+        this.tableName = tableName;
+    }
+
+    public HiveOptions withCallTimeout(Integer callTimeout) {
+        this.callTimeout = callTimeout;
+        return this;
+    }
+
+    public HiveOptions withStaticPartitionValues(List<String> staticPartitionValues) {
+        this.staticPartitionValues = staticPartitionValues;
+        return this;
+    }
+
+    public HiveOptions withAutoCreatePartitions(Boolean autoCreatePartitions) {
+        this.autoCreatePartitions = autoCreatePartitions;
+        return this;
+    }
+
+    public HiveOptions withKerberosKeytab(String kerberosKeytab) {
+        this.kerberosKeytab = kerberosKeytab;
+        return this;
+    }
+
+    public HiveOptions withKerberosPrincipal(String kerberosPrincipal) {
+        this.kerberosPrincipal = kerberosPrincipal;
+        return this;
+    }
+
+    public HiveOptions withHiveConf(HiveConf hiveConf) {
+        this.hiveConf = hiveConf;
+        return this;
+    }
+
+    public HiveOptions withStreamingOptimizations(boolean streamingOptimizations) {
+        this.streamingOptimizations = streamingOptimizations;
+        return this;
+    }
+
+    public String getMetaStoreURI() {
+        return metaStoreURI;
+    }
+
+    public String getDatabaseName() {
+        return databaseName;
+    }
+
+    public String getTableName() {
+        return tableName;
+    }
+
+    public String getQualifiedTableName() {
+        return databaseName + "." + tableName;
+    }
+
+    public List<String> getStaticPartitionValues() {
+        return staticPartitionValues;
+    }
+
+    public Integer getCallTimeOut() {
+        return callTimeout;
+    }
+
+    public Integer getIdleTimeout() {
+        return idleTimeout;
+    }
+
+    public HiveConf getHiveConf() {
+        return hiveConf;
+    }
+
+    public boolean getStreamingOptimizations() {
+        return streamingOptimizations;
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java
new file mode 100644
index 0000000..cb4107f
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.util.hive;
+
+
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.components.Validator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+
+public class HiveUtils {
+    private static final Logger LOG = LoggerFactory.getLogger(HiveUtils.class);
+
+    public static final Validator GREATER_THAN_ONE_VALIDATOR = (subject, value, context) -> {
+        if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) {
+            return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build();
+        }
+
+        String reason = null;
+        try {
+            final int intVal = Integer.parseInt(value);
+
+            if (intVal < 2) {
+                reason = "value is less than 2";
+            }
+        } catch (final NumberFormatException e) {
+            reason = "value is not a valid integer";
+        }
+
+        return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
+    };
+
+    /**
+     * Validates that one or more files exist, as specified in a single property.
+     */
+    public static Validator createMultipleFilesExistValidator() {
+        return (subject, input, context) -> {
+            if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(input)) {
+                return new ValidationResult.Builder().subject(subject).input(input).explanation("Expression Language Present").valid(true).build();
+            }
+            final String[] files = input.split("\\s*,\\s*");
+            for (String filename : files) {
+                try {
+                    final File file = new File(filename.trim());
+                    final boolean valid = file.exists() && file.isFile();
+                    if (!valid) {
+                        final String message = "File " + file + " does not exist or is not a file";
+                        return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build();
+                    }
+                } catch (SecurityException e) {
+                    final String message = "Unable to access " + filename + " due to " + e.getMessage();
+                    return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build();
+                }
+            }
+            return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
+        };
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java
new file mode 100644
index 0000000..1014efb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.util.hive;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary)
+ */
+public class ValidationResources {
+
+    private final String configResources;
+    private final Configuration configuration;
+
+    public ValidationResources(String configResources, Configuration configuration) {
+        this.configResources = configResources;
+        this.configuration = configuration;
+    }
+
+    public String getConfigResources() {
+        return configResources;
+    }
+
+    public Configuration getConfiguration() {
+        return configuration;
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
new file mode 100644
index 0000000..0bf68f3
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.nifi.dbcp.hive.Hive3ConnectionPool
\ No newline at end of file

[3/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Posted by bb...@apache.org.

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
new file mode 100644
index 0000000..8766887
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.nifi.processors.hive.SelectHive3QL
+org.apache.nifi.processors.hive.PutHive3QL
+org.apache.nifi.processors.hive.PutHive3Streaming
+org.apache.nifi.processors.orc.PutORC

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubConnectionError.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubConnectionError.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubConnectionError.java
new file mode 100644
index 0000000..2854cae
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubConnectionError.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+public class StubConnectionError extends ConnectionError {
+    public StubConnectionError(String msg) {
+        super(msg);
+    }
+
+    public StubConnectionError(String msg, Exception innerEx) {
+        super(msg, innerEx);
+    }
+
+    public StubConnectionError(StreamingConnection endPoint, Exception innerEx) {
+        super(endPoint, innerEx);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubSerializationError.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubSerializationError.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubSerializationError.java
new file mode 100644
index 0000000..58e65a8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubSerializationError.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+public class StubSerializationError extends SerializationError {
+    public StubSerializationError(String msg, Exception e) {
+        super(msg, e);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubStreamingIOFailure.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubStreamingIOFailure.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubStreamingIOFailure.java
new file mode 100644
index 0000000..ec9931c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubStreamingIOFailure.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+public class StubStreamingIOFailure extends StreamingIOFailure {
+
+    public StubStreamingIOFailure(String msg, Exception cause) {
+        super(msg, cause);
+    }
+
+    public StubStreamingIOFailure(String msg) {
+        super(msg);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubTransactionError.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubTransactionError.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubTransactionError.java
new file mode 100644
index 0000000..0fddcb8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/hive/streaming/StubTransactionError.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+public class StubTransactionError extends TransactionError {
+    public StubTransactionError(String msg, Exception e) {
+        super(msg, e);
+    }
+
+    public StubTransactionError(String msg) {
+        super(msg);
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPoolTest.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPoolTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPoolTest.java
new file mode 100644
index 0000000..5d9f87c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive3ConnectionPoolTest.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.dbcp.hive;
+
+import org.apache.commons.dbcp.BasicDataSource;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.controller.AbstractControllerService;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.registry.VariableDescriptor;
+import org.apache.nifi.util.MockConfigurationContext;
+import org.apache.nifi.util.MockVariableRegistry;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.isA;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class Hive3ConnectionPoolTest {
+    private UserGroupInformation userGroupInformation;
+    private Hive3ConnectionPool hive3ConnectionPool;
+    private BasicDataSource basicDataSource;
+    private ComponentLog componentLog;
+
+    @Before
+    public void setup() throws Exception {
+        userGroupInformation = mock(UserGroupInformation.class);
+        basicDataSource = mock(BasicDataSource.class);
+        componentLog = mock(ComponentLog.class);
+
+        when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> {
+            try {
+                return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run();
+            } catch (IOException | Error | RuntimeException | InterruptedException e) {
+                throw e;
+            } catch (Throwable e) {
+                throw new UndeclaredThrowableException(e);
+            }
+        });
+        initPool();
+    }
+
+    private void initPool() throws Exception {
+        hive3ConnectionPool = new Hive3ConnectionPool();
+
+        Field ugiField = Hive3ConnectionPool.class.getDeclaredField("ugi");
+        ugiField.setAccessible(true);
+        ugiField.set(hive3ConnectionPool, userGroupInformation);
+
+        Field dataSourceField = Hive3ConnectionPool.class.getDeclaredField("dataSource");
+        dataSourceField.setAccessible(true);
+        dataSourceField.set(hive3ConnectionPool, basicDataSource);
+
+        Field componentLogField = AbstractControllerService.class.getDeclaredField("logger");
+        componentLogField.setAccessible(true);
+        componentLogField.set(hive3ConnectionPool, componentLog);
+    }
+
+    @Test(expected = ProcessException.class)
+    public void testGetConnectionSqlException() throws SQLException {
+        SQLException sqlException = new SQLException("bad sql");
+        when(basicDataSource.getConnection()).thenThrow(sqlException);
+        try {
+            hive3ConnectionPool.getConnection();
+        } catch (ProcessException e) {
+            assertEquals(sqlException, e.getCause());
+            throw e;
+        }
+    }
+
+    @Test
+    public void testExpressionLanguageSupport() throws Exception {
+        final String URL = "jdbc:hive2://localhost:10000/default";
+        final String USER = "user";
+        final String PASS = "pass";
+        final int MAX_CONN = 7;
+        final String MAX_WAIT = "10 sec"; // 10000 milliseconds
+        final String CONF = "/path/to/hive-site.xml";
+        hive3ConnectionPool = new Hive3ConnectionPool();
+
+        Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
+            put(Hive3ConnectionPool.DATABASE_URL, "${url}");
+            put(Hive3ConnectionPool.DB_USER, "${username}");
+            put(Hive3ConnectionPool.DB_PASSWORD, "${password}");
+            put(Hive3ConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}");
+            put(Hive3ConnectionPool.MAX_WAIT_TIME, "${maxwait}");
+            put(Hive3ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}");
+        }};
+
+        MockVariableRegistry registry = new MockVariableRegistry();
+        registry.setVariable(new VariableDescriptor("url"), URL);
+        registry.setVariable(new VariableDescriptor("username"), USER);
+        registry.setVariable(new VariableDescriptor("password"), PASS);
+        registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN));
+        registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT);
+        registry.setVariable(new VariableDescriptor("hiveconf"), CONF);
+
+
+        MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
+        hive3ConnectionPool.onConfigured(context);
+
+        Field dataSourceField = Hive3ConnectionPool.class.getDeclaredField("dataSource");
+        dataSourceField.setAccessible(true);
+        basicDataSource = (BasicDataSource) dataSourceField.get(hive3ConnectionPool);
+        assertEquals(URL, basicDataSource.getUrl());
+        assertEquals(USER, basicDataSource.getUsername());
+        assertEquals(PASS, basicDataSource.getPassword());
+        assertEquals(MAX_CONN, basicDataSource.getMaxActive());
+        assertEquals(10000L, basicDataSource.getMaxWait());
+        assertEquals(URL, hive3ConnectionPool.getConnectionURL());
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestHive3Parser.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestHive3Parser.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestHive3Parser.java
new file mode 100644
index 0000000..63c893d
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestHive3Parser.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSessionFactory;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.util.MockProcessContext;
+import org.apache.nifi.util.MockProcessorInitializationContext;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestHive3Parser extends AbstractHive3QLProcessor {
+
+    @Before
+    public void initialize() {
+        final MockProcessContext processContext = new MockProcessContext(this);
+        final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext);
+        initialize(initializationContext);
+    }
+
+    @Override
+    public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
+
+    }
+
+    @Test
+    public void parseSelect() {
+        String query = "select a.empid, to_something(b.saraly) from " +
+                "company.emp a inner join default.salary b where a.empid = b.empid";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(2, tableNames.size());
+        assertTrue(tableNames.contains(new TableName("company", "emp", true)));
+        assertTrue(tableNames.contains(new TableName("default", "salary", true)));
+    }
+
+    @Test
+    public void parseSelectPrepared() {
+        String query = "select empid from company.emp a where a.firstName = ?";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(1, tableNames.size());
+        assertTrue(tableNames.contains(new TableName("company", "emp", true)));
+    }
+
+
+    @Test
+    public void parseLongSelect() {
+        String query = "select\n" +
+                "\n" +
+                "    i_item_id,\n" +
+                "\n" +
+                "    i_item_desc,\n" +
+                "\n" +
+                "    s_state,\n" +
+                "\n" +
+                "    count(ss_quantity) as store_sales_quantitycount,\n" +
+                "\n" +
+                "    avg(ss_quantity) as store_sales_quantityave,\n" +
+                "\n" +
+                "    stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" +
+                "\n" +
+                "    stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" +
+                "\n" +
+                "    count(sr_return_quantity) as store_returns_quantitycount,\n" +
+                "\n" +
+                "    avg(sr_return_quantity) as store_returns_quantityave,\n" +
+                "\n" +
+                "    stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" +
+                "\n" +
+                "    stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" +
+                "\n" +
+                "    count(cs_quantity) as catalog_sales_quantitycount,\n" +
+                "\n" +
+                "    avg(cs_quantity) as catalog_sales_quantityave,\n" +
+                "\n" +
+                "    stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" +
+                "\n" +
+                "    stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" +
+                "\n" +
+                "from\n" +
+                "\n" +
+                "    store_sales,\n" +
+                "\n" +
+                "    store_returns,\n" +
+                "\n" +
+                "    catalog_sales,\n" +
+                "\n" +
+                "    date_dim d1,\n" +
+                "\n" +
+                "    date_dim d2,\n" +
+                "\n" +
+                "    date_dim d3,\n" +
+                "\n" +
+                "    store,\n" +
+                "\n" +
+                "    item\n" +
+                "\n" +
+                "where\n" +
+                "\n" +
+                "    d1.d_quarter_name = '2000Q1'\n" +
+                "\n" +
+                "        and d1.d_date_sk = ss_sold_date_sk\n" +
+                "\n" +
+                "        and i_item_sk = ss_item_sk\n" +
+                "\n" +
+                "        and s_store_sk = ss_store_sk\n" +
+                "\n" +
+                "        and ss_customer_sk = sr_customer_sk\n" +
+                "\n" +
+                "        and ss_item_sk = sr_item_sk\n" +
+                "\n" +
+                "        and ss_ticket_number = sr_ticket_number\n" +
+                "\n" +
+                "        and sr_returned_date_sk = d2.d_date_sk\n" +
+                "\n" +
+                "        and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
+                "\n" +
+                "        and sr_customer_sk = cs_bill_customer_sk\n" +
+                "\n" +
+                "        and sr_item_sk = cs_item_sk\n" +
+                "\n" +
+                "        and cs_sold_date_sk = d3.d_date_sk\n" +
+                "\n" +
+                "        and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
+                "\n" +
+                "group by i_item_id , i_item_desc , s_state\n" +
+                "\n" +
+                "order by i_item_id , i_item_desc , s_state\n" +
+                "\n" +
+                "limit 100";
+
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(6, tableNames.size());
+        AtomicInteger cnt = new AtomicInteger(0);
+        for (TableName tableName : tableNames) {
+            if (tableName.equals(new TableName(null, "store_sales", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "store_returns", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "catalog_sales", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "date_dim", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "store", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "item", true))) {
+                cnt.incrementAndGet();
+            }
+        }
+        assertEquals(6, cnt.get());
+    }
+
+    @Test
+    public void parseSelectInsert() {
+        String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'";
+
+        // The same database.tableName can appear two times for input and output.
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(2, tableNames.size());
+        AtomicInteger cnt = new AtomicInteger(0);
+        tableNames.forEach(tableName -> {
+            if (tableName.equals(new TableName("databaseA", "tableA", false))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName("databaseA", "tableA", true))) {
+                cnt.incrementAndGet();
+            }
+        });
+        assertEquals(2, cnt.get());
+    }
+
+    @Test
+    public void parseInsert() {
+        String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id";
+
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(3, tableNames.size());
+        AtomicInteger cnt = new AtomicInteger(0);
+        tableNames.forEach(tableName -> {
+            if (tableName.equals(new TableName("databaseB", "tableB1", false))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "tableA1", true))) {
+                cnt.incrementAndGet();
+            } else if (tableName.equals(new TableName(null, "tableA2", true))) {
+                cnt.incrementAndGet();
+            }
+        });
+        assertEquals(3, cnt.get());
+    }
+
+    @Test
+    public void parseUpdate() {
+        String query = "update table_a set y = 'updated' where x > 100";
+
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(1, tableNames.size());
+        assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
+    }
+
+    @Test
+    public void parseDelete() {
+        String query = "delete from table_a where x > 100";
+
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(1, tableNames.size());
+        assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
+    }
+
+    @Test
+    public void parseDDL() {
+        String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" +
+                "EmployeeID INT,FirstName STRING, Title STRING,\n" +
+                "State STRING, Laptop STRING)\n" +
+                "COMMENT 'Employee Names'\n" +
+                "STORED AS ORC";
+
+
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(1, tableNames.size());
+        assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false)));
+    }
+
+    @Test
+    public void parseSetProperty() {
+        String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(0, tableNames.size());
+    }
+
+    @Test
+    public void parseSetRole() {
+        String query = "set role all";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(0, tableNames.size());
+    }
+
+    @Test
+    public void parseShowRoles() {
+        String query = "show roles";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(0, tableNames.size());
+    }
+
+    @Test
+    public void parseMsck() {
+        String query = "msck repair table table_a";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(1, tableNames.size());
+        assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
+    }
+
+    @Test
+    public void parseAddJar() {
+        String query = "ADD JAR hdfs:///tmp/my_jar.jar";
+        final Set<TableName> tableNames = findTableNames(query);
+        System.out.printf("tableNames=%s\n", tableNames);
+        assertEquals(0, tableNames.size());
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3QL.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3QL.java
new file mode 100644
index 0000000..99b0b7d
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3QL.java
@@ -0,0 +1,792 @@
+package org.apache.nifi.processors.hive;/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.nifi.controller.AbstractControllerService;
+import org.apache.nifi.dbcp.DBCPService;
+import org.apache.nifi.dbcp.hive.Hive3DBCPService;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.mockito.Mockito;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class TestPutHive3QL {
+    private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)";
+    private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))";
+
+    @Rule
+    public TemporaryFolder folder = new TemporaryFolder();
+
+    @BeforeClass
+    public static void setup() {
+        System.setProperty("derby.stream.error.file", "target/derby.log");
+    }
+
+    @Test
+    public void testDirectStatements() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes());
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+
+        runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes());
+        runner.run();
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("George", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+    @Test
+    public void testFailInMiddleWithBadStatement() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
+        runner.run();
+
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 1);
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 3);
+        runner.getFlowFilesForRelationship(PutHive3QL.REL_SUCCESS)
+                .forEach(f -> f.assertAttributeEquals(PutHive3QL.ATTR_OUTPUT_TABLES, "PERSONS"));
+    }
+
+    @Test
+    public void testFailInMiddleWithBadStatementRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+        runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
+        runner.run();
+
+        // The 1st one should be routed to success, others should stay in queue.
+        assertEquals(3, runner.getQueueSize().getObjectCount());
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 1);
+    }
+
+    @Test
+    public void testFailAtBeginning() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
+        runner.run();
+
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 1);
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 2);
+    }
+
+    @Test
+    public void testFailAtBeginningRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+        runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
+        runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        assertEquals(3, runner.getQueueSize().getObjectCount());
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 0);
+    }
+
+    @Test
+    public void testFailInMiddleWithBadParameterType() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final Map<String, String> goodAttributes = new HashMap<>();
+        goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        goodAttributes.put("hiveql.args.1.value", "84");
+
+        final Map<String, String> badAttributes = new HashMap<>();
+        badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
+        badAttributes.put("hiveql.args.1.value", "hello");
+
+        final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, badAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 1);
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 3);
+    }
+
+
+    @Test
+    public void testFailInMiddleWithBadParameterValue() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final Map<String, String> goodAttributes = new HashMap<>();
+        goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        goodAttributes.put("hiveql.args.1.value", "84");
+
+        final Map<String, String> badAttributes = new HashMap<>();
+        badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100
+
+        final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, badAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 3);
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertTrue(rs.next());
+                assertTrue(rs.next());
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+    @Test
+    public void testFailInMiddleWithBadNumberFormat() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersonsAutoId);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final Map<String, String> goodAttributes = new HashMap<>();
+        goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        goodAttributes.put("hiveql.args.1.value", "84");
+
+        final Map<String, String> badAttributes = new HashMap<>();
+        badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        badAttributes.put("hiveql.args.1.value", "NOT_NUMBER");
+
+        final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, badAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.enqueue(data, goodAttributes);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3QL.REL_SUCCESS, 3);
+        runner.assertTransferCount(PutHive3QL.REL_FAILURE, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertTrue(rs.next());
+                assertTrue(rs.next());
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+
+    @Test
+    public void testUsingSqlDataTypesWithNegativeValues() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)");
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", "-5");
+        attributes.put("hiveql.args.1.value", "84");
+        runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', ?)".getBytes(), attributes);
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+        runner.getFlowFilesForRelationship(PutHive3QL.REL_SUCCESS).get(0).assertAttributeEquals(PutHive3QL.ATTR_OUTPUT_TABLES, "PERSONS");
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+    @Test
+    public void testStatementsWithPreparedParameters() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+
+        runner.clearTransferState();
+
+        attributes.clear();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.1.value", "George");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.2.value", "1");
+
+        runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes);
+        runner.run();
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("George", rs.getString(2));
+                assertEquals(84, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+
+    @Test
+    public void testMultipleStatementsWithinFlowFile() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
+            "UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.4.value", "1");
+
+        runner.enqueue(sql.getBytes(), attributes);
+        runner.run();
+
+        // should fail because of the semicolon
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+        runner.getFlowFilesForRelationship(PutHive3QL.REL_SUCCESS)
+                .forEach(f -> f.assertAttributeEquals(PutHive3QL.ATTR_OUTPUT_TABLES, "PERSONS"));
+
+        // Now we can check that the values were inserted by the multi-statement script.
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals("Record ID mismatch", 1, rs.getInt(1));
+                assertEquals("Record NAME mismatch", "George", rs.getString(2));
+            }
+        }
+    }
+
+    @Test
+    public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
+                "UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; ";
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.4.value", "1");
+
+        runner.enqueue(sql.getBytes(), attributes);
+        runner.run();
+
+        // should fail because of the semicolon
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+
+        // Now we can check that the values were inserted by the multi-statement script.
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals("Record ID mismatch", 1, rs.getInt(1));
+                assertEquals("Record NAME mismatch", "George\\;", rs.getString(2));
+            }
+        }
+    }
+
+
+    @Test
+    public void testWithNullParameter() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+
+        runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_SUCCESS, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+                assertEquals(1, rs.getInt(1));
+                assertEquals("Mark", rs.getString(2));
+                assertEquals(0, rs.getInt(3));
+                assertFalse(rs.next());
+            }
+        }
+    }
+
+    @Test
+    public void testInvalidStatement() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final File tempDir = folder.getRoot();
+        final File dbDir = new File(tempDir, "db");
+        final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                stmt.executeUpdate(createPersons);
+            }
+        }
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
+            "UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; ";
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.4.value", "1");
+
+        runner.enqueue(sql.getBytes(), attributes);
+        runner.run();
+
+        // should fail because of the table is invalid
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_FAILURE, 1);
+
+        try (final Connection conn = service.getConnection()) {
+            try (final Statement stmt = conn.createStatement()) {
+                final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
+                assertTrue(rs.next());
+            }
+        }
+    }
+
+
+    @Test
+    public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final DBCPService service = new SQLExceptionService(null);
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+
+        final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
+            "UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
+
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.4.value", "1");
+
+        runner.enqueue(sql.getBytes(), attributes);
+        runner.run();
+
+        // should fail because there isn't a valid connection and tables don't exist.
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_RETRY, 1);
+    }
+
+    @Test
+    public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
+        final TestRunner runner = TestRunners.newTestRunner(PutHive3QL.class);
+        final DBCPService service = new SQLExceptionService(null);
+        runner.addControllerService("dbcp", service);
+        runner.enableControllerService(service);
+
+        runner.setProperty(PutHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+        runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
+
+        final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
+                "UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
+
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.1.value", "1");
+
+        attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
+        attributes.put("hiveql.args.2.value", "Mark");
+
+        attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.3.value", "84");
+
+        attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
+        attributes.put("hiveql.args.4.value", "1");
+
+        runner.enqueue(sql.getBytes(), attributes);
+        try {
+            runner.run();
+            fail("Should throw ProcessException");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+        runner.assertAllFlowFilesTransferred(PutHive3QL.REL_RETRY, 0);
+    }
+
+    /**
+     * Simple implementation only for testing purposes
+     */
+    private static class MockDBCPService extends AbstractControllerService implements Hive3DBCPService {
+        private final String dbLocation;
+
+        MockDBCPService(final String dbLocation) {
+            this.dbLocation = dbLocation;
+        }
+
+        @Override
+        public String getIdentifier() {
+            return "dbcp";
+        }
+
+        @Override
+        public Connection getConnection() throws ProcessException {
+            try {
+                Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
+                return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true");
+            } catch (final Exception e) {
+                e.printStackTrace();
+                throw new ProcessException("getConnection failed: " + e);
+            }
+        }
+
+        @Override
+        public String getConnectionURL() {
+            return "jdbc:derby:" + dbLocation + ";create=true";
+        }
+    }
+
+    /**
+     * Simple implementation only for testing purposes
+     */
+    private static class SQLExceptionService extends AbstractControllerService implements Hive3DBCPService {
+        private final Hive3DBCPService service;
+        private int allowedBeforeFailure = 0;
+        private int successful = 0;
+
+        SQLExceptionService(final Hive3DBCPService service) {
+            this.service = service;
+        }
+
+        @Override
+        public String getIdentifier() {
+            return "dbcp";
+        }
+
+        @Override
+        public Connection getConnection() throws ProcessException {
+            try {
+                if (++successful > allowedBeforeFailure) {
+                    final Connection conn = Mockito.mock(Connection.class);
+                    Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException"));
+                    return conn;
+                } else {
+                    return service.getConnection();
+                }
+            } catch (final Exception e) {
+                e.printStackTrace();
+                throw new ProcessException("getConnection failed: " + e);
+            }
+        }
+
+        @Override
+        public String getConnectionURL() {
+            return service != null ? service.getConnectionURL() : null;
+        }
+    }
+}

[6/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Posted by bb...@apache.org.

NIFI-4963: Added Hive3 bundle
- Incorporated review comments
- Added more defensive code for PutHive3Streaming error handling

This closes #2755.

Signed-off-by: Bryan Bende <bb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/da99f873
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/da99f873
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/da99f873

Branch: refs/heads/master
Commit: da99f873a7d2f636465efd86178e578da75674a0
Parents: 8feac9a
Author: Matthew Burgess <ma...@apache.org>
Authored: Mon Jun 4 11:37:48 2018 -0400
Committer: Bryan Bende <bb...@apache.org>
Committed: Wed Jun 13 14:32:58 2018 -0400

----------------------------------------------------------------------
 .travis.yml                                     |   2 +-
 nifi-assembly/pom.xml                           |  23 +
 .../nifi-hive-bundle/nifi-hive-nar/pom.xml      |   2 +
 .../nifi-hive-processors/pom.xml                |   7 +-
 .../nifi-hive-services-api-nar/pom.xml          |   2 +
 .../nifi-hive-services-api/pom.xml              |   6 +
 .../apache/nifi/dbcp/hive/Hive3DBCPService.java |  30 +
 .../nifi-hive-bundle/nifi-hive3-nar/pom.xml     |  49 ++
 .../src/main/resources/META-INF/NOTICE          | 349 ++++++++
 .../nifi-hive3-processors/pom.xml               | 140 +++
 .../hadoop/hive/ql/io/orc/NiFiOrcUtils.java     | 533 +++++++++++
 .../apache/hive/streaming/HiveRecordWriter.java | 106 +++
 .../apache/hive/streaming/NiFiRecordSerDe.java  | 282 ++++++
 .../nifi/dbcp/hive/Hive3ConnectionPool.java     | 385 ++++++++
 .../hive/AbstractHive3QLProcessor.java          | 348 ++++++++
 .../apache/nifi/processors/hive/PutHive3QL.java | 280 ++++++
 .../nifi/processors/hive/PutHive3Streaming.java | 560 ++++++++++++
 .../nifi/processors/hive/SelectHive3QL.java     | 477 ++++++++++
 .../org/apache/nifi/processors/orc/PutORC.java  | 175 ++++
 .../orc/record/ORCHDFSRecordWriter.java         | 110 +++
 .../hive/AuthenticationFailedException.java     |  23 +
 .../apache/nifi/util/hive/CsvOutputOptions.java |  63 ++
 .../apache/nifi/util/hive/HiveConfigurator.java | 119 +++
 .../apache/nifi/util/hive/HiveJdbcCommon.java   | 450 ++++++++++
 .../org/apache/nifi/util/hive/HiveOptions.java  | 117 +++
 .../org/apache/nifi/util/hive/HiveUtils.java    |  76 ++
 .../nifi/util/hive/ValidationResources.java     |  41 +
 ...org.apache.nifi.controller.ControllerService |  15 +
 .../org.apache.nifi.processor.Processor         |  18 +
 .../hive/streaming/StubConnectionError.java     |  31 +
 .../hive/streaming/StubSerializationError.java  |  23 +
 .../hive/streaming/StubStreamingIOFailure.java  |  28 +
 .../hive/streaming/StubTransactionError.java    |  27 +
 .../nifi/dbcp/hive/Hive3ConnectionPoolTest.java | 138 +++
 .../nifi/processors/hive/TestHive3Parser.java   | 292 ++++++
 .../nifi/processors/hive/TestPutHive3QL.java    | 792 +++++++++++++++++
 .../processors/hive/TestPutHive3Streaming.java  | 878 +++++++++++++++++++
 .../nifi/processors/hive/TestSelectHive3QL.java | 539 ++++++++++++
 .../apache/nifi/processors/orc/PutORCTest.java  | 416 +++++++++
 .../apache/nifi/util/orc/TestNiFiOrcUtils.java  | 437 +++++++++
 .../src/test/resources/array_of_records.avsc    |  38 +
 .../src/test/resources/core-site-security.xml   |  30 +
 .../src/test/resources/core-site.xml            |  22 +
 .../src/test/resources/fake.keytab              |   0
 .../src/test/resources/hive-site-security.xml   |  26 +
 .../src/test/resources/hive-site.xml            |  22 +
 .../src/test/resources/krb5.conf                |   0
 .../src/test/resources/user.avsc                |  26 +
 .../src/test/resources/user_logical_types.avsc  |  27 +
 nifi-nar-bundles/nifi-hive-bundle/pom.xml       |  59 +-
 50 files changed, 8587 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index d6c9b39..05351b5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -55,4 +55,4 @@ script:
   # Note: The reason the sed is done as part of script is to ensure the pom hack 
   # won't affect the 'clean install' above
   - bash .travis.sh
-  - mvn -T 2 clean install -Pcontrib-check,include-grpc,include-atlas -Ddir-only | grep -v -F -f .travis-output-filters && exit ${PIPESTATUS[0]}
+  - mvn -T 2 clean install -Pcontrib-check,include-grpc,include-atlas,include-hive3 -Ddir-only | grep -v -F -f .travis-output-filters && exit ${PIPESTATUS[0]}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml
index e610aa0..3f473c8 100755
--- a/nifi-assembly/pom.xml
+++ b/nifi-assembly/pom.xml
@@ -549,6 +549,12 @@ language governing permissions and limitations under the License. -->
         </dependency>
         <dependency>
             <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hive3-nar</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <type>nar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-site-to-site-reporting-nar</artifactId>
             <version>1.7.0-SNAPSHOT</version>
             <type>nar</type>
@@ -746,6 +752,23 @@ language governing permissions and limitations under the License. -->
             </dependencies>
         </profile>
         <profile>
+            <id>include-hive3</id>
+            <!-- This profile handles the inclusion of Hive 3 artifacts. The NAR
+            is quite large and makes the resultant binary distribution significantly
+            larger (275+ MB). -->
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.nifi</groupId>
+                    <artifactId>nifi-hive3-nar</artifactId>
+                    <version>1.7.0-SNAPSHOT</version>
+                    <type>nar</type>
+                </dependency>
+            </dependencies>
+        </profile>
+        <profile>
             <id>rpm</id>
             <activation>
                 <activeByDefault>false</activeByDefault>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/pom.xml
index 41e0159..cb2d60d 100644
--- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/pom.xml
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/pom.xml
@@ -28,6 +28,8 @@
     <properties>
         <maven.javadoc.skip>true</maven.javadoc.skip>
         <source.skip>true</source.skip>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive.hadoop.version}</hadoop.version>
     </properties>
 
     <dependencies>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml
index f7b7b0b..4a6be6d 100644
--- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml
@@ -24,13 +24,17 @@
 
     <artifactId>nifi-hive-processors</artifactId>
     <packaging>jar</packaging>
+
     <properties>
-        <hive.version>1.2.1</hive.version>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive.hadoop.version}</hadoop.version>
     </properties>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-api</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.apache.nifi</groupId>
@@ -84,6 +88,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
             <exclusions>
                 <exclusion>
                     <groupId>com.google.code.findbugs</groupId>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api-nar/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api-nar/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api-nar/pom.xml
index 1060225..b0b9a4c 100644
--- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api-nar/pom.xml
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api-nar/pom.xml
@@ -28,6 +28,8 @@
     <properties>
         <maven.javadoc.skip>true</maven.javadoc.skip>
         <source.skip>true</source.skip>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive.hadoop.version}</hadoop.version>
     </properties>
 
     <dependencies>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/pom.xml
index 6d85c38..2db9b34 100644
--- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/pom.xml
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/pom.xml
@@ -25,10 +25,16 @@
     <artifactId>nifi-hive-services-api</artifactId>
     <packaging>jar</packaging>
 
+    <properties>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive.hadoop.version}</hadoop.version>
+    </properties>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-api</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.apache.nifi</groupId>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive3DBCPService.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive3DBCPService.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive3DBCPService.java
new file mode 100644
index 0000000..e3af3aa
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive3DBCPService.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.dbcp.hive;
+
+
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+
+/**
+ * Definition for Database Connection Pooling Service.
+ *
+ */
+@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
+@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive. Connections can be asked from pool and returned after usage.")
+public interface Hive3DBCPService extends HiveDBCPService {
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/pom.xml
new file mode 100644
index 0000000..41286d5
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/pom.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements. See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License. You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.nifi</groupId>
+        <artifactId>nifi-hive-bundle</artifactId>
+        <version>1.7.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>nifi-hive3-nar</artifactId>
+    <version>1.7.0-SNAPSHOT</version>
+    <packaging>nar</packaging>
+    <properties>
+        <maven.javadoc.skip>true</maven.javadoc.skip>
+        <source.skip>true</source.skip>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive3.hadoop.version}</hadoop.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hive-services-api-nar</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <type>nar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hive3-processors</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+        </dependency>
+    </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/src/main/resources/META-INF/NOTICE
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000..9da3e38
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,349 @@
+nifi-hive-nar
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm):
+  Copyright 2015 The Apache Software Foundation
+  The derived work is adapted from
+    org/apache/storm/hive/common/HiveWriter.java
+    org/apache/storm/hive/common/HiveOptions.java
+  and can be found in the org.apache.nifi.util.hive package
+
+This includes derived works from the Apache Hive (ASLv2 licensed) project (https://github.com/apache/hive):
+  Copyright 2008-2016 The Apache Software Foundation
+  The derived work is adapted from
+    release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+  and can be found in the org.apache.hadoop.hive.ql.io.orc package
+  The derived work is adapted from
+      branch-3.0/streaming/src/java/org/apache/hive/streaming/StrictJsonWriter.java
+    and can be found in the org.apache.hive.streaming.HiveRecordWriter class
+  The derived work is adapted from
+      branch-3.0/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
+    and can be found in the org.apache.hive.streaming.NiFiRecordSerDe class
+
+===========================================
+Apache Software License v2
+===========================================
+
+The following binary components are provided under the Apache Software License v2
+
+  (ASLv2) Apache Ant
+    The following NOTICE information applies:
+      Apache Ant
+      Copyright 1999-2016 The Apache Software Foundation
+
+  (ASLv2) Apache Commons Codec
+      The following NOTICE information applies:
+        Apache Commons Codec
+        Copyright 2002-2014 The Apache Software Foundation
+
+        src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
+        contains test data from http://aspell.net/test/orig/batch0.tab.
+        Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
+
+        ===============================================================================
+
+        The content of package org.apache.commons.codec.language.bm has been translated
+        from the original php source code available at http://stevemorse.org/phoneticinfo.htm
+        with permission from the original authors.
+        Original source copyright:
+        Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
+
+  (ASLv2) Apache Commons DBCP
+    The following NOTICE information applies:
+      Apache Commons DBCP
+      Copyright 2001-2015 The Apache Software Foundation.
+
+  (ASLv2) Apache Commons EL
+    The following NOTICE information applies:
+      Apache Commons EL
+      Copyright 1999-2016 The Apache Software Foundation
+
+      EL-8 patch - Copyright 2004-2007 Jamie Taylor
+      http://issues.apache.org/jira/browse/EL-8
+
+  (ASLv2) Apache HttpComponents
+      The following NOTICE information applies:
+        Apache HttpComponents Client
+        Copyright 1999-2016 The Apache Software Foundation
+        Apache HttpComponents Core - HttpCore
+        Copyright 2006-2009 The Apache Software Foundation
+
+  (ASLv2) Apache Commons Logging
+    The following NOTICE information applies:
+      Apache Commons Logging
+      Copyright 2003-2014 The Apache Software Foundation
+
+  (ASLv2) Apache Commons Pool
+    The following NOTICE information applies:
+      Apache Commons Pool
+      Copyright 1999-2009 The Apache Software Foundation.
+
+  (ASLv2) Apache Commons IO
+    The following NOTICE information applies:
+      Apache Commons IO
+      Copyright 2002-2016 The Apache Software Foundation
+
+  (ASLv2) Apache Hive
+    The following NOTICE information applies:
+      Apache Hive
+      Copyright 2008-2015 The Apache Software Foundation
+
+      This product includes software developed by The Apache Software
+      Foundation (http://www.apache.org/).
+
+      This product includes Jersey (https://jersey.java.net/)
+      Copyright (c) 2010-2014 Oracle and/or its affiliates.
+
+      This project includes software copyrighted by Microsoft Corporation and
+      licensed under the Apache License, Version 2.0.
+
+      This project includes software copyrighted by Dell SecureWorks and
+      licensed under the Apache License, Version 2.0.
+
+  (ASLv2) Jackson JSON processor
+    The following NOTICE information applies:
+      # Jackson JSON processor
+
+      Jackson is a high-performance, Free/Open Source JSON processing library.
+      It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
+      been in development since 2007.
+      It is currently developed by a community of developers, as well as supported
+      commercially by FasterXML.com.
+
+      ## Licensing
+
+      Jackson core and extension components may licensed under different licenses.
+      To find the details that apply to this artifact see the accompanying LICENSE file.
+      For more information, including possible other licensing options, contact
+      FasterXML.com (http://fasterxml.com).
+
+       ## Credits
+
+       A list of contributors may be found from CREDITS file, which is included
+       in some artifacts (usually source distributions); but is always available
+       from the source code management (SCM) system project uses.
+
+  (ASLv2) BoneCP
+    The following NOTICE information applies:
+       BoneCP
+       Copyright 2010 Wallace Wadge
+
+  (ASLv2) Apache Hadoop
+    The following NOTICE information applies:
+      The binary distribution of this product bundles binaries of
+      org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
+      following notices:
+      * Copyright 2011 Dain Sundstrom <da...@iq80.com>
+      * Copyright 2011 FuseSource Corp. http://fusesource.com
+
+      The binary distribution of this product bundles binaries of
+      org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
+      which has the following notices:
+      * This product includes software developed by FuseSource Corp.
+        http://fusesource.com
+      * This product includes software developed at
+        Progress Software Corporation and/or its  subsidiaries or affiliates.
+      * This product includes software developed by IBM Corporation and others.
+
+  (ASLv2) Apache HBase
+    The following NOTICE information applies:
+      Apache HBase
+      Copyright 2007-2015 The Apache Software Foundation
+
+      --
+      This product incorporates portions of the 'Hadoop' project
+
+      Copyright 2007-2009 The Apache Software Foundation
+
+      Licensed under the Apache License v2.0
+      --
+      Our Orca logo we got here: http://www.vectorfree.com/jumping-orca
+      It is licensed Creative Commons Attribution 3.0.
+      See https://creativecommons.org/licenses/by/3.0/us/
+      We changed the logo by stripping the colored background, inverting
+      it and then rotating it some.
+
+      Later we found that vectorfree.com image is not properly licensed.
+      The original is owned by vectorportal.com. The original was
+      relicensed so we could use it as Creative Commons Attribution 3.0.
+      The license is bundled with the download available here:
+      http://www.vectorportal.com/subcategory/205/KILLER-WHALE-FREE-VECTOR.eps/ifile/9136/detailtest.asp
+      --
+      This product includes portions of the Bootstrap project v3.0.0
+
+      Copyright 2013 Twitter, Inc.
+
+      Licensed under the Apache License v2.0
+
+      This product uses the Glyphicons Halflings icon set.
+
+      http://glyphicons.com/
+
+      Copyright Jan Kovařík
+
+      Licensed under the Apache License v2.0 as a part of the Bootstrap project.
+
+      --
+      This product includes portions of the Guava project v14, specifically
+      'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java'
+
+      Copyright (C) 2007 The Guava Authors
+
+      Licensed under the Apache License, Version 2.0
+
+  (ASLv2) Apache Commons Lang
+    The following NOTICE information applies:
+      Apache Commons Lang
+      Copyright 2001-2015 The Apache Software Foundation
+
+  (ASLv2) Apache Curator
+    The following NOTICE information applies:
+      Apache Curator
+      Copyright 2013-2014 The Apache Software Foundation
+
+  (ASLv2) Apache Derby
+    The following NOTICE information applies:
+      Apache Derby
+      Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils,
+      the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation.
+
+  (ASLv2) Apache DS
+    The following NOTICE information applies:
+      ApacheDS
+      Copyright 2003-2015 The Apache Software Foundation
+
+  (ASLv2) Apache Geronimo
+    The following NOTICE information applies:
+      Apache Geronimo
+      Copyright 2003-2008 The Apache Software Foundation
+
+  (ASLv2) HTrace Core
+    The following NOTICE information applies:
+      In addition, this product includes software dependencies. See
+      the accompanying LICENSE.txt for a listing of dependencies
+      that are NOT Apache licensed (with pointers to their licensing)
+
+      Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
+      is a distributed tracing system that is Apache 2.0 Licensed.
+      Copyright 2012 Twitter, Inc.
+
+  (ASLv2) Jettison
+    The following NOTICE information applies:
+       Copyright 2006 Envoi Solutions LLC
+
+  (ASLv2) Jetty
+    The following NOTICE information applies:
+       Jetty Web Container
+       Copyright 1995-2017 Mort Bay Consulting Pty Ltd.
+
+  (ASLv2) Apache log4j
+    The following NOTICE information applies:
+      Apache log4j
+      Copyright 2007 The Apache Software Foundation
+
+  (ASLv2) Parquet MR
+    The following NOTICE information applies:
+      Parquet MR
+      Copyright 2012 Twitter, Inc.
+
+      This project includes code from https://github.com/lemire/JavaFastPFOR
+      parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java
+      Apache License Version 2.0 http://www.apache.org/licenses/.
+      (c) Daniel Lemire, http://lemire.me/en/
+
+  (ASLv2) Apache Thrift
+    The following NOTICE information applies:
+      Apache Thrift
+      Copyright 2006-2010 The Apache Software Foundation.
+
+  (ASLv2) Apache Twill
+    The following NOTICE information applies:
+      Apache Twill
+      Copyright 2013-2016 The Apache Software Foundation
+
+  (ASLv2) Dropwizard Metrics
+    The following NOTICE information applies:
+      Metrics
+      Copyright 2010-2013 Coda Hale and Yammer, Inc.
+
+      This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
+      LongAdder), which was released with the following comments:
+
+          Written by Doug Lea with assistance from members of JCP JSR-166
+          Expert Group and released to the public domain, as explained at
+          http://creativecommons.org/publicdomain/zero/1.0/
+
+  (ASLv2) Joda Time
+      The following NOTICE information applies:
+        This product includes software developed by
+        Joda.org (http://www.joda.org/).
+
+  (ASLv2) The Netty Project
+      The following NOTICE information applies:
+        The Netty Project
+        Copyright 2011 The Netty Project
+
+  (ASLv2) Apache Tomcat
+      The following NOTICE information applies:
+        Apache Tomcat
+        Copyright 2007 The Apache Software Foundation
+
+          Java Management Extensions (JMX) support is provided by
+          the MX4J package, which is open source software.  The
+          original software and related information is available
+          at http://mx4j.sourceforge.net.
+
+          Java compilation software for JSP pages is provided by Eclipse,
+          which is open source software.  The orginal software and
+          related infomation is available at
+          http://www.eclipse.org.
+
+  (ASLv2) Apache ZooKeeper
+     The following NOTICE information applies:
+       Apache ZooKeeper
+       Copyright 2009-2012 The Apache Software Foundation
+
+  (ASLv2) Google GSON
+     The following NOTICE information applies:
+       Copyright 2008 Google Inc.
+
+  (ASLv2) JPam
+    The following NOTICE information applies:
+      Copyright 2003-2006 Greg Luck
+
+  ************************
+  Common Development and Distribution License 1.1
+  ************************
+
+  The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details.
+
+      (CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net)
+      (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/)
+      (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/)
+      (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/)
+      (CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/)
+      (CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/)
+      (CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail)
+
+
+  ************************
+  Common Development and Distribution License 1.0
+  ************************
+
+    The following binary components are provided under the Common Development and Distribution License 1.0.  See project link for details.
+
+      (CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available)
+      (CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided)
+      (CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
+      (CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net)
+
+  *****************
+  Public Domain
+  *****************
+
+  The following binary components are provided to the 'Public Domain'.  See project link for details.
+
+      (Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/)

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/pom.xml
new file mode 100644
index 0000000..c62268b
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/pom.xml
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements. See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License. You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.nifi</groupId>
+        <artifactId>nifi-hive-bundle</artifactId>
+        <version>1.7.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>nifi-hive3-processors</artifactId>
+    <packaging>jar</packaging>
+
+    <properties>
+        <!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
+        <hadoop.version>${hive3.hadoop.version}</hadoop.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-api</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-processor-utils</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-dbcp-service-api</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hive-services-api</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-record</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hadoop-record-utils</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-record-serialization-service-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-kerberos-credentials-service-api</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-jdbc</artifactId>
+            <version>${hive3.version}</version>
+            <exclusions>
+                <exclusion>
+                        <groupId>org.json</groupId>
+                        <artifactId>json</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-streaming</artifactId>
+            <version>${hive3.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive.hcatalog</groupId>
+            <artifactId>hive-hcatalog-core</artifactId>
+            <version>${hive3.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.google.code.findbugs</groupId>
+                    <artifactId>jsr305</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-hadoop-utils</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>com.github.stephenc.findbugs</groupId>
+            <artifactId>findbugs-annotations</artifactId>
+            <version>1.3.9-1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-mock-record-utils</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-mock</artifactId>
+            <version>1.7.0-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java
new file mode 100644
index 0000000..7231421
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java
@@ -0,0 +1,533 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.avro.LogicalType;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.util.Utf8;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.orc.MemoryManager;
+import org.apache.orc.OrcConf;
+import org.apache.orc.impl.MemoryManagerImpl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+
+/**
+ * Utility methods for ORC support (conversion from Avro, conversion to Hive types, e.g.
+ */
+public class NiFiOrcUtils {
+
+    public static Object convertToORCObject(TypeInfo typeInfo, Object o, final boolean hiveFieldNames) {
+        if (o != null) {
+            if (typeInfo instanceof UnionTypeInfo) {
+                OrcUnion union = new OrcUnion();
+                // Avro uses Utf8 and GenericData.EnumSymbol objects instead of Strings. This is handled in other places in the method, but here
+                // we need to determine the union types from the objects, so choose String.class if the object is one of those Avro classes
+                Class clazzToCompareTo = o.getClass();
+                if (o instanceof org.apache.avro.util.Utf8 || o instanceof GenericData.EnumSymbol) {
+                    clazzToCompareTo = String.class;
+                }
+                // Need to find which of the union types correspond to the primitive object
+                TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(
+                        ObjectInspectorFactory.getReflectionObjectInspector(clazzToCompareTo, ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
+                List<TypeInfo> unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos();
+
+                int index = 0;
+                while (index < unionTypeInfos.size() && !unionTypeInfos.get(index).equals(objectTypeInfo)) {
+                    index++;
+                }
+                if (index < unionTypeInfos.size()) {
+                    union.set((byte) index, convertToORCObject(objectTypeInfo, o, hiveFieldNames));
+                } else {
+                    throw new IllegalArgumentException("Object Type for class " + o.getClass().getName() + " not in Union declaration");
+                }
+                return union;
+            }
+            if (o instanceof Integer) {
+                return new IntWritable((int) o);
+            }
+            if (o instanceof Boolean) {
+                return new BooleanWritable((boolean) o);
+            }
+            if (o instanceof Long) {
+                return new LongWritable((long) o);
+            }
+            if (o instanceof Float) {
+                return new FloatWritable((float) o);
+            }
+            if (o instanceof Double) {
+                return new DoubleWritable((double) o);
+            }
+            if (o instanceof String || o instanceof Utf8 || o instanceof GenericData.EnumSymbol) {
+                return new Text(o.toString());
+            }
+            if (o instanceof ByteBuffer) {
+                return new BytesWritable(((ByteBuffer) o).array());
+            }
+            if (o instanceof Timestamp) {
+                return new TimestampWritable((Timestamp) o);
+            }
+            if (o instanceof Date) {
+                return new DateWritable((Date) o);
+            }
+            if (o instanceof Object[]) {
+                Object[] objArray = (Object[]) o;
+                TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
+                return Arrays.stream(objArray)
+                        .map(o1 -> convertToORCObject(listTypeInfo, o1, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof int[]) {
+                int[] intArray = (int[]) o;
+                return Arrays.stream(intArray)
+                        .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("int"), element, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof long[]) {
+                long[] longArray = (long[]) o;
+                return Arrays.stream(longArray)
+                        .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("bigint"), element, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof float[]) {
+                float[] floatArray = (float[]) o;
+                return IntStream.range(0, floatArray.length)
+                        .mapToDouble(i -> floatArray[i])
+                        .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("float"), (float) element, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof double[]) {
+                double[] doubleArray = (double[]) o;
+                return Arrays.stream(doubleArray)
+                        .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("double"), element, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof boolean[]) {
+                boolean[] booleanArray = (boolean[]) o;
+                return IntStream.range(0, booleanArray.length)
+                        .map(i -> booleanArray[i] ? 1 : 0)
+                        .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("boolean"), element == 1, hiveFieldNames))
+                        .collect(Collectors.toList());
+            }
+            if (o instanceof GenericData.Array) {
+                GenericData.Array array = ((GenericData.Array) o);
+                // The type information in this case is interpreted as a List
+                TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
+                return array.stream().map((element) -> convertToORCObject(listTypeInfo, element, hiveFieldNames)).collect(Collectors.toList());
+            }
+            if (o instanceof List) {
+                return o;
+            }
+            if (o instanceof Map) {
+                Map map = new HashMap();
+                TypeInfo keyInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
+                TypeInfo valueInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
+                // Unions are not allowed as key/value types, so if we convert the key and value objects,
+                // they should return Writable objects
+                ((Map) o).forEach((key, value) -> {
+                    Object keyObject = convertToORCObject(keyInfo, key, hiveFieldNames);
+                    Object valueObject = convertToORCObject(valueInfo, value, hiveFieldNames);
+                    if (keyObject == null) {
+                        throw new IllegalArgumentException("Maps' key cannot be null");
+                    }
+                    map.put(keyObject, valueObject);
+                });
+                return map;
+            }
+            if (o instanceof GenericData.Record) {
+                GenericData.Record record = (GenericData.Record) o;
+                TypeInfo recordSchema = NiFiOrcUtils.getOrcField(record.getSchema(), hiveFieldNames);
+                List<Schema.Field> recordFields = record.getSchema().getFields();
+                if (recordFields != null) {
+                    Object[] fieldObjects = new Object[recordFields.size()];
+                    for (int i = 0; i < recordFields.size(); i++) {
+                        Schema.Field field = recordFields.get(i);
+                        Schema fieldSchema = field.schema();
+                        Object fieldObject = record.get(field.name());
+                        fieldObjects[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema, hiveFieldNames), fieldObject, hiveFieldNames);
+                    }
+                    return NiFiOrcUtils.createOrcStruct(recordSchema, fieldObjects);
+                }
+            }
+            throw new IllegalArgumentException("Error converting object of type " + o.getClass().getName() + " to ORC type " + typeInfo.getTypeName());
+        } else {
+            return null;
+        }
+    }
+
+
+    /**
+     * Create an object of OrcStruct given a TypeInfo and a list of objects
+     *
+     * @param typeInfo The TypeInfo object representing the ORC record schema
+     * @param objs     ORC objects/Writables
+     * @return an OrcStruct containing the specified objects for the specified schema
+     */
+    @SuppressWarnings("unchecked")
+    public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
+        SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct
+                .createObjectInspector(typeInfo);
+        List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs();
+        OrcStruct result = (OrcStruct) oi.create();
+        result.setNumFields(fields.size());
+        for (int i = 0; i < fields.size(); i++) {
+            oi.setStructFieldData(result, fields.get(i), objs[i]);
+        }
+        return result;
+    }
+
+    public static String normalizeHiveTableName(String name) {
+        return name.replaceAll("[\\. ]", "_");
+    }
+
+    public static String generateHiveDDL(Schema avroSchema, String tableName, boolean hiveFieldNames) {
+        Schema.Type schemaType = avroSchema.getType();
+        StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
+        sb.append(tableName);
+        sb.append(" (");
+        if (Schema.Type.RECORD.equals(schemaType)) {
+            List<String> hiveColumns = new ArrayList<>();
+            List<Schema.Field> fields = avroSchema.getFields();
+            if (fields != null) {
+                hiveColumns.addAll(
+                        fields.stream().map(field -> (hiveFieldNames ? field.name().toLowerCase() : field.name()) + " "
+                                + getHiveTypeFromAvroType(field.schema(), hiveFieldNames)).collect(Collectors.toList()));
+            }
+            sb.append(StringUtils.join(hiveColumns, ", "));
+            sb.append(") STORED AS ORC");
+            return sb.toString();
+        } else {
+            throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD");
+        }
+    }
+
+
+    public static TypeInfo getOrcField(Schema fieldSchema, boolean hiveFieldNames) throws IllegalArgumentException {
+        Schema.Type fieldType = fieldSchema.getType();
+        LogicalType logicalType = fieldSchema.getLogicalType();
+
+        switch (fieldType) {
+            case INT:
+            case LONG:
+                // Handle logical types
+                if (logicalType != null) {
+                    if (LogicalTypes.date().equals(logicalType)) {
+                        return TypeInfoFactory.dateTypeInfo;
+                    } else if (LogicalTypes.timeMicros().equals(logicalType)) {
+                        // Time micros isn't supported by our Record Field types (see AvroTypeUtil)
+                        throw new IllegalArgumentException("time-micros is not a supported field type");
+                    } else if (LogicalTypes.timeMillis().equals(logicalType)) {
+                        return TypeInfoFactory.intTypeInfo;
+                    } else if (LogicalTypes.timestampMicros().equals(logicalType)) {
+                        // Timestamp micros isn't supported by our Record Field types (see AvroTypeUtil)
+                        throw new IllegalArgumentException("timestamp-micros is not a supported field type");
+                    } else if (LogicalTypes.timestampMillis().equals(logicalType)) {
+                        return TypeInfoFactory.timestampTypeInfo;
+                    }
+                }
+                return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType);
+            case BYTES:
+                // Handle logical types
+                if (logicalType != null) {
+                    if (logicalType instanceof LogicalTypes.Decimal) {
+                        return TypeInfoFactory.doubleTypeInfo;
+                    }
+                }
+                return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType);
+
+            case BOOLEAN:
+            case DOUBLE:
+            case FLOAT:
+            case STRING:
+                return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType);
+
+            case UNION:
+                List<Schema> unionFieldSchemas = fieldSchema.getTypes();
+
+                if (unionFieldSchemas != null) {
+                    // Ignore null types in union
+                    List<TypeInfo> orcFields = unionFieldSchemas.stream().filter(
+                            unionFieldSchema -> !Schema.Type.NULL.equals(unionFieldSchema.getType()))
+                            .map((it) -> NiFiOrcUtils.getOrcField(it, hiveFieldNames))
+                            .collect(Collectors.toList());
+
+                    // Flatten the field if the union only has one non-null element
+                    if (orcFields.size() == 1) {
+                        return orcFields.get(0);
+                    } else {
+                        return TypeInfoFactory.getUnionTypeInfo(orcFields);
+                    }
+                }
+                return null;
+
+            case ARRAY:
+                return TypeInfoFactory.getListTypeInfo(getOrcField(fieldSchema.getElementType(), hiveFieldNames));
+
+            case MAP:
+                return TypeInfoFactory.getMapTypeInfo(
+                        getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING),
+                        getOrcField(fieldSchema.getValueType(), hiveFieldNames));
+
+            case RECORD:
+                List<Schema.Field> avroFields = fieldSchema.getFields();
+                if (avroFields != null) {
+                    List<String> orcFieldNames = new ArrayList<>(avroFields.size());
+                    List<TypeInfo> orcFields = new ArrayList<>(avroFields.size());
+                    avroFields.forEach(avroField -> {
+                        String fieldName = hiveFieldNames ? avroField.name().toLowerCase() : avroField.name();
+                        orcFieldNames.add(fieldName);
+                        orcFields.add(getOrcField(avroField.schema(), hiveFieldNames));
+                    });
+                    return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields);
+                }
+                return null;
+
+            case ENUM:
+                // An enum value is just a String for ORC/Hive
+                return getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING);
+
+            default:
+                throw new IllegalArgumentException("Did not recognize Avro type " + fieldType.getName());
+        }
+
+    }
+
+    public static Schema.Type getAvroSchemaTypeOfObject(Object o) {
+        if (o == null) {
+            return Schema.Type.NULL;
+        } else if (o instanceof Integer) {
+            return Schema.Type.INT;
+        } else if (o instanceof Long) {
+            return Schema.Type.LONG;
+        } else if (o instanceof Boolean) {
+            return Schema.Type.BOOLEAN;
+        } else if (o instanceof byte[]) {
+            return Schema.Type.BYTES;
+        } else if (o instanceof Float) {
+            return Schema.Type.FLOAT;
+        } else if (o instanceof Double) {
+            return Schema.Type.DOUBLE;
+        } else if (o instanceof Enum) {
+            return Schema.Type.ENUM;
+        } else if (o instanceof Object[]) {
+            return Schema.Type.ARRAY;
+        } else if (o instanceof List) {
+            return Schema.Type.ARRAY;
+        } else if (o instanceof Map) {
+            return Schema.Type.MAP;
+        } else {
+            throw new IllegalArgumentException("Object of class " + o.getClass() + " is not a supported Avro Type");
+        }
+    }
+
+    public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException {
+        if (avroType == null) {
+            throw new IllegalArgumentException("Avro type is null");
+        }
+        switch (avroType) {
+            case INT:
+                return TypeInfoFactory.getPrimitiveTypeInfo("int");
+            case LONG:
+                return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
+            case BOOLEAN:
+                return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
+            case BYTES:
+                return TypeInfoFactory.getPrimitiveTypeInfo("binary");
+            case DOUBLE:
+                return TypeInfoFactory.getPrimitiveTypeInfo("double");
+            case FLOAT:
+                return TypeInfoFactory.getPrimitiveTypeInfo("float");
+            case STRING:
+                return TypeInfoFactory.getPrimitiveTypeInfo("string");
+            default:
+                throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type");
+        }
+    }
+
+    public static String getHiveTypeFromAvroType(Schema avroSchema, boolean hiveFieldNames) {
+        if (avroSchema == null) {
+            throw new IllegalArgumentException("Avro schema is null");
+        }
+
+        Schema.Type avroType = avroSchema.getType();
+        LogicalType logicalType = avroSchema.getLogicalType();
+
+        switch (avroType) {
+            case INT:
+                if (logicalType != null) {
+                    if (LogicalTypes.date().equals(logicalType)) {
+                        return "DATE";
+                    }
+                    // Time-millis has no current corresponding Hive type, perhaps an INTERVAL type when that is fully supported.
+                }
+                return "INT";
+            case LONG:
+                if (logicalType != null) {
+                    if (LogicalTypes.timestampMillis().equals(logicalType)) {
+                        return "TIMESTAMP";
+                    }
+                    // Timestamp-micros and time-micros are not supported by our Record Field type system
+                }
+                return "BIGINT";
+            case BOOLEAN:
+                return "BOOLEAN";
+            case BYTES:
+                if (logicalType != null) {
+                    if (logicalType instanceof LogicalTypes.Decimal) {
+                        return "DOUBLE";
+                    }
+                }
+                return "BINARY";
+            case DOUBLE:
+                return "DOUBLE";
+            case FLOAT:
+                return "FLOAT";
+            case STRING:
+            case ENUM:
+                return "STRING";
+            case UNION:
+                List<Schema> unionFieldSchemas = avroSchema.getTypes();
+                if (unionFieldSchemas != null) {
+                    List<String> hiveFields = new ArrayList<>();
+                    for (Schema unionFieldSchema : unionFieldSchemas) {
+                        Schema.Type unionFieldSchemaType = unionFieldSchema.getType();
+                        // Ignore null types in union
+                        if (!Schema.Type.NULL.equals(unionFieldSchemaType)) {
+                            hiveFields.add(getHiveTypeFromAvroType(unionFieldSchema, hiveFieldNames));
+                        }
+                    }
+                    // Flatten the field if the union only has one non-null element
+                    return (hiveFields.size() == 1)
+                            ? hiveFields.get(0)
+                            : "UNIONTYPE<" + StringUtils.join(hiveFields, ", ") + ">";
+
+                }
+                break;
+            case MAP:
+                return "MAP<STRING, " + getHiveTypeFromAvroType(avroSchema.getValueType(), hiveFieldNames) + ">";
+            case ARRAY:
+                return "ARRAY<" + getHiveTypeFromAvroType(avroSchema.getElementType(), hiveFieldNames) + ">";
+            case RECORD:
+                List<Schema.Field> recordFields = avroSchema.getFields();
+                if (recordFields != null) {
+                    List<String> hiveFields = recordFields.stream().map(
+                            recordField -> (hiveFieldNames ? recordField.name().toLowerCase() : recordField.name()) + ":"
+                                    + getHiveTypeFromAvroType(recordField.schema(), hiveFieldNames)).collect(Collectors.toList());
+                    return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">";
+                }
+                break;
+            default:
+                break;
+        }
+
+        throw new IllegalArgumentException("Error converting Avro type " + avroType.getName() + " to Hive type");
+    }
+
+
+    public static Writer createWriter(
+            Path path,
+            Configuration conf,
+            TypeInfo orcSchema,
+            long stripeSize,
+            CompressionKind compress,
+            int bufferSize) throws IOException {
+
+        int rowIndexStride = (int) OrcConf.ROW_INDEX_STRIDE.getLong(conf);
+
+        boolean addBlockPadding = OrcConf.BLOCK_PADDING.getBoolean(conf);
+
+        String versionName = OrcConf.WRITE_FORMAT.getString(conf);
+        OrcFile.Version versionValue = (versionName == null)
+                ? OrcFile.Version.CURRENT
+                : OrcFile.Version.byName(versionName);
+
+        OrcFile.EncodingStrategy encodingStrategy;
+        String enString = OrcConf.ENCODING_STRATEGY.getString(conf);
+        if (enString == null) {
+            encodingStrategy = OrcFile.EncodingStrategy.SPEED;
+        } else {
+            encodingStrategy = OrcFile.EncodingStrategy.valueOf(enString);
+        }
+
+        final double paddingTolerance = OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf);
+
+        long blockSizeValue = OrcConf.BLOCK_SIZE.getLong(conf);
+
+        double bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(conf);
+
+        ObjectInspector inspector = OrcStruct.createObjectInspector(orcSchema);
+
+        OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(conf)
+                .rowIndexStride(rowIndexStride)
+                .blockPadding(addBlockPadding)
+                .version(versionValue)
+                .encodingStrategy(encodingStrategy)
+                .paddingTolerance(paddingTolerance)
+                .blockSize(blockSizeValue)
+                .bloomFilterFpp(bloomFilterFpp)
+                .memory(getMemoryManager(conf))
+                .inspector(inspector)
+                .stripeSize(stripeSize)
+                .bufferSize(bufferSize)
+                .compress(compress);
+
+        return OrcFile.createWriter(path, writerOptions);
+    }
+
+    private static MemoryManager memoryManager = null;
+
+    private static synchronized MemoryManager getMemoryManager(Configuration conf) {
+        if (memoryManager == null) {
+            memoryManager = new MemoryManagerImpl(conf);
+        }
+        return memoryManager;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/HiveRecordWriter.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/HiveRecordWriter.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/HiveRecordWriter.java
new file mode 100644
index 0000000..6edb374
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/HiveRecordWriter.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+import com.google.common.base.Joiner;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.record.Record;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Properties;
+
+public class HiveRecordWriter extends AbstractRecordWriter {
+
+    private RecordReader recordReader;
+    private NiFiRecordSerDe serde;
+    private ComponentLog log;
+
+    public HiveRecordWriter(RecordReader recordReader, ComponentLog log) {
+        super(null);
+        this.recordReader = recordReader;
+        this.log = log;
+    }
+
+    @Override
+    public AbstractSerDe createSerde() throws SerializationError {
+        try {
+            Properties tableProps = table.getMetadata();
+            tableProps.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(",").join(inputColumns));
+            tableProps.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(":").join(inputTypes));
+            NiFiRecordSerDe serde = new NiFiRecordSerDe(recordReader, log);
+            SerDeUtils.initializeSerDe(serde, conf, tableProps, null);
+            this.serde = serde;
+            return serde;
+        } catch (SerDeException e) {
+            throw new SerializationError("Error initializing serde " + NiFiRecordSerDe.class.getName(), e);
+        }
+    }
+
+    @Override
+    public Object encode(byte[] bytes) {
+        throw new UnsupportedOperationException(this.getClass().getName() + " does not support encoding of records via bytes, only via an InputStream");
+    }
+
+    @Override
+    public void write(long writeId, byte[] record) {
+        throw new UnsupportedOperationException(this.getClass().getName() + " does not support writing of records via bytes, only via an InputStream");
+    }
+
+    @Override
+    public void write(long writeId, InputStream inputStream) throws StreamingException {
+        // The inputStream is already available to the recordReader, so just iterate through the records
+        try {
+            Record record;
+            while ((record = recordReader.nextRecord()) != null) {
+                write(writeId, record);
+            }
+        } catch (MalformedRecordException | IOException e) {
+            throw new StreamingException(e.getLocalizedMessage(), e);
+        }
+    }
+
+    public Object encode(Record record) throws SerializationError {
+        try {
+            ObjectWritable blob = new ObjectWritable(record);
+            return serde.deserialize(blob);
+        } catch (SerDeException e) {
+            throw new SerializationError("Unable to convert Record into Object", e);
+        }
+    }
+
+    private void write(long writeId, Record record) throws StreamingException {
+        checkAutoFlush();
+        try {
+            Object encodedRow = encode(record);
+            int bucket = getBucket(encodedRow);
+            List<String> partitionValues = getPartitionValues(encodedRow);
+            getRecordUpdater(partitionValues, bucket).insert(writeId, encodedRow);
+            conn.getConnectionStats().incrementRecordsWritten();
+        } catch (IOException e) {
+            throw new StreamingIOFailure("Error writing record in transaction write id (" + writeId + ")", e);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/NiFiRecordSerDe.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/NiFiRecordSerDe.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/NiFiRecordSerDe.java
new file mode 100644
index 0000000..d4b444a
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/main/java/org/apache/hive/streaming/NiFiRecordSerDe.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.streaming;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hive.common.util.HiveStringUtils;
+import org.apache.hive.common.util.TimestampParser;
+import org.apache.nifi.avro.AvroTypeUtil;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordField;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.util.DataTypeUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class NiFiRecordSerDe extends AbstractSerDe {
+
+    protected RecordReader recordReader;
+    protected ComponentLog log;
+    protected List<String> columnNames;
+    protected StructTypeInfo schema;
+    protected SerDeStats stats;
+
+    protected StandardStructObjectInspector cachedObjectInspector;
+    protected TimestampParser tsParser;
+
+    private final static Pattern INTERNAL_PATTERN = Pattern.compile("_col([0-9]+)");
+
+    private Map<String, Integer> fieldPositionMap;
+
+    public NiFiRecordSerDe(RecordReader recordReader, ComponentLog log) {
+        this.recordReader = recordReader;
+        this.log = log;
+    }
+
+    @Override
+    public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+        List<TypeInfo> columnTypes;
+        StructTypeInfo rowTypeInfo;
+
+        log.debug("Initializing NiFiRecordSerDe: {}", tbl.entrySet().toArray());
+
+        // Get column names and types
+        String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
+        String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+        final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl
+                .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
+        // all table column names
+        if (columnNameProperty.isEmpty()) {
+            columnNames = new ArrayList<>(0);
+        } else {
+            columnNames = new ArrayList<>(Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
+        }
+
+        // all column types
+        if (columnTypeProperty.isEmpty()) {
+            columnTypes = new ArrayList<>(0);
+        } else {
+            columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+        }
+
+        log.debug("columns: {}, {}", new Object[]{columnNameProperty, columnNames});
+        log.debug("types: {}, {} ", new Object[]{columnTypeProperty, columnTypes});
+
+        assert (columnNames.size() == columnTypes.size());
+
+        rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+        schema = rowTypeInfo;
+        log.debug("schema : {}", new Object[]{schema});
+        cachedObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
+        tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)));
+        // Populate mapping of field names to column positions
+        try {
+            populateFieldPositionMap();
+        } catch (MalformedRecordException | IOException e) {
+            throw new SerDeException(e);
+        }
+        stats = new SerDeStats();
+    }
+
+    @Override
+    public Class<? extends Writable> getSerializedClass() {
+        return ObjectWritable.class;
+    }
+
+    @Override
+    public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException {
+        throw new UnsupportedOperationException("This SerDe only supports deserialization");
+    }
+
+    @Override
+    public SerDeStats getSerDeStats() {
+        return stats;
+    }
+
+    @Override
+    public Object deserialize(Writable writable) throws SerDeException {
+        ObjectWritable t = (ObjectWritable) writable;
+        Record record = (Record) t.get();
+        List<Object> r = new ArrayList<>(Collections.nCopies(columnNames.size(), null));
+        try {
+            RecordSchema recordSchema = record.getSchema();
+            for (RecordField field : recordSchema.getFields()) {
+                String fieldName = field.getFieldName();
+                String normalizedFieldName = fieldName.toLowerCase();
+
+                // Get column position of field name, and set field value there
+                Integer fpos = fieldPositionMap.get(normalizedFieldName);
+                if(fpos == null || fpos == -1) {
+                    // This is either a partition column or not a column in the target table, ignore either way
+                    continue;
+                }
+                Object currField = extractCurrentField(record, field, schema.getStructFieldTypeInfo(normalizedFieldName));
+                r.set(fpos, currField);
+            }
+            stats.setRowCount(stats.getRowCount() + 1);
+
+        } catch (Exception e) {
+            log.warn("Error [{}] parsing Record [{}].", new Object[]{e.getLocalizedMessage(), t}, e);
+            throw new SerDeException(e);
+        }
+
+        return r;
+    }
+
+    /**
+     * Utility method to extract current expected field from given JsonParser
+     * isTokenCurrent is a boolean variable also passed in, which determines
+     * if the JsonParser is already at the token we expect to read next, or
+     * needs advancing to the next before we read.
+     */
+    private Object extractCurrentField(Record record, RecordField field, TypeInfo fieldTypeInfo) {
+        Object val;
+        String fieldName = (field != null) ? field.getFieldName() : null;
+
+        switch (fieldTypeInfo.getCategory()) {
+            case PRIMITIVE:
+                PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = PrimitiveObjectInspector.PrimitiveCategory.UNKNOWN;
+                if (fieldTypeInfo instanceof PrimitiveTypeInfo) {
+                    primitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
+                }
+                switch (primitiveCategory) {
+                    case INT:
+                    case BYTE:
+                    case SHORT:
+                        val = record.getAsInt(fieldName);
+                        break;
+                    case LONG:
+                        val = record.getAsLong(fieldName);
+                        break;
+                    case BOOLEAN:
+                        val = record.getAsBoolean(fieldName);
+                        break;
+                    case FLOAT:
+                        val = record.getAsFloat(fieldName);
+                        break;
+                    case DOUBLE:
+                        val = record.getAsDouble(fieldName);
+                        break;
+                    case STRING:
+                    case VARCHAR:
+                    case CHAR:
+                        val = record.getAsString(fieldName);
+                        break;
+                    case BINARY:
+                        val = AvroTypeUtil.convertByteArray(record.getAsArray(fieldName)).array();
+                        break;
+                    case DATE:
+                        val = record.getAsDate(fieldName, field.getDataType().getFormat());
+                        break;
+                    case TIMESTAMP:
+                        val = DataTypeUtils.toTimestamp(record.getValue(fieldName), () -> DataTypeUtils.getDateFormat(field.getDataType().getFormat()), fieldName);
+                        break;
+                    case DECIMAL:
+                        val = record.getAsDouble(fieldName);
+                        break;
+                    default:
+                        throw new IllegalArgumentException("Field " + fieldName + " cannot be converted to unknown type: " + primitiveCategory.name());
+                }
+                break;
+            case LIST:
+                val = Arrays.asList(record.getAsArray(fieldName));
+                break;
+            case MAP:
+                val = DataTypeUtils.convertRecordFieldtoObject(record.getValue(fieldName), field.getDataType());
+                break;
+            case STRUCT:
+                val = DataTypeUtils.convertRecordFieldtoObject(record.getValue(fieldName), field.getDataType());
+                break;
+            default:
+                log.error("Unknown type found: " + fieldTypeInfo + "for field of type: " + field.getDataType().toString());
+                return null;
+        }
+        return val;
+    }
+
+    @Override
+    public ObjectInspector getObjectInspector() {
+        return cachedObjectInspector;
+    }
+
+    private void populateFieldPositionMap() throws MalformedRecordException, IOException {
+        // Populate the mapping of field names to column positions only once
+        fieldPositionMap = new HashMap<>(columnNames.size());
+
+        RecordSchema recordSchema = recordReader.getSchema();
+        for (RecordField field : recordSchema.getFields()) {
+            String fieldName = field.getFieldName();
+            String normalizedFieldName = fieldName.toLowerCase();
+
+            int fpos = schema.getAllStructFieldNames().indexOf(fieldName.toLowerCase());
+            if (fpos == -1) {
+                Matcher m = INTERNAL_PATTERN.matcher(fieldName);
+                fpos = m.matches() ? Integer.parseInt(m.group(1)) : -1;
+
+                log.debug("NPE finding position for field [{}] in schema [{}],"
+                        + " attempting to check if it is an internal column name like _col0", new Object[]{fieldName, schema});
+                if (fpos == -1) {
+                    // unknown field, we return. We'll continue from the next field onwards. Log at debug level because partition columns will be "unknown fields"
+                    log.debug("Field {} is not found in the target table, ignoring...", new Object[]{field.getFieldName()});
+                    continue;
+                }
+                // If we get past this, then the column name did match the hive pattern for an internal
+                // column name, such as _col0, etc, so it *MUST* match the schema for the appropriate column.
+                // This means people can't use arbitrary column names such as _col0, and expect us to ignore it
+                // if we find it.
+                if (!fieldName.equalsIgnoreCase(HiveConf.getColumnInternalName(fpos))) {
+                    log.error("Hive internal column name {} and position "
+                            + "encoding {} for the column name are at odds", new Object[]{fieldName, fpos});
+                    throw new IOException("Hive internal column name (" + fieldName
+                            + ") and position encoding (" + fpos
+                            + ") for the column name are at odds");
+                }
+                // If we reached here, then we were successful at finding an alternate internal
+                // column mapping, and we're about to proceed.
+            }
+            fieldPositionMap.put(normalizedFieldName, fpos);
+        }
+    }
+}

[2/6] nifi git commit: NIFI-4963: Added Hive3 bundle - Incorporated review comments - Added more defensive code for PutHive3Streaming error handling

Posted by bb...@apache.org.

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3Streaming.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3Streaming.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3Streaming.java
new file mode 100644
index 0000000..6a65783
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive3Streaming.java
@@ -0,0 +1,878 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hive.streaming.ConnectionStats;
+import org.apache.hive.streaming.HiveRecordWriter;
+import org.apache.hive.streaming.PartitionInfo;
+import org.apache.hive.streaming.RecordWriter;
+import org.apache.hive.streaming.StreamingConnection;
+import org.apache.hive.streaming.StreamingException;
+import org.apache.hive.streaming.StubConnectionError;
+import org.apache.hive.streaming.StubSerializationError;
+import org.apache.hive.streaming.StubStreamingIOFailure;
+import org.apache.hive.streaming.StubTransactionError;
+import org.apache.nifi.avro.AvroTypeUtil;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.controller.ControllerService;
+import org.apache.nifi.controller.ControllerServiceInitializationContext;
+import org.apache.nifi.hadoop.SecurityUtil;
+import org.apache.nifi.kerberos.KerberosCredentialsService;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.record.MockRecordParser;
+import org.apache.nifi.serialization.record.RecordField;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.apache.nifi.util.hive.HiveConfigurator;
+import org.apache.nifi.util.hive.HiveOptions;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import static org.apache.nifi.processors.hive.AbstractHive3QLProcessor.ATTR_OUTPUT_TABLES;
+import static org.apache.nifi.processors.hive.PutHive3Streaming.HIVE_STREAMING_RECORD_COUNT_ATTR;
+import static org.apache.nifi.processors.hive.PutHive3Streaming.KERBEROS_CREDENTIALS_SERVICE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * Unit tests for PutHive3Streaming processor.
+ */
+public class TestPutHive3Streaming {
+
+    private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml";
+    private static final String TARGET_HIVE = "target/hive";
+
+    private TestRunner runner;
+    private MockPutHive3Streaming processor;
+
+    private HiveConfigurator hiveConfigurator;
+    private HiveConf hiveConf;
+    private UserGroupInformation ugi;
+    private Schema schema;
+
+    @Before
+    public void setUp() throws Exception {
+
+        final String avroSchema = IOUtils.toString(new FileInputStream("src/test/resources/user.avsc"), StandardCharsets.UTF_8);
+        schema = new Schema.Parser().parse(avroSchema);
+
+        Configuration testConf = new Configuration();
+        testConf.addResource(new Path(TEST_CONF_PATH));
+
+        // needed for calls to UserGroupInformation.setConfiguration() to work when passing in
+        // config with Kerberos authentication enabled
+        System.setProperty("java.security.krb5.realm", "nifi.com");
+        System.setProperty("java.security.krb5.kdc", "nifi.kdc");
+
+        ugi = null;
+        processor = new MockPutHive3Streaming();
+        hiveConfigurator = mock(HiveConfigurator.class);
+        hiveConf = new HiveConf();
+        when(hiveConfigurator.getConfigurationFromFiles(anyString())).thenReturn(hiveConf);
+        processor.hiveConfigurator = hiveConfigurator;
+
+        // Delete any temp files from previous tests
+        try {
+            FileUtils.deleteDirectory(new File(TARGET_HIVE));
+        } catch (IOException ioe) {
+            // Do nothing, directory may not have existed
+        }
+    }
+
+    private void configure(final PutHive3Streaming processor, final int numUsers) throws InitializationException {
+        configure(processor, numUsers, -1);
+    }
+
+    private void configure(final PutHive3Streaming processor, final int numUsers, int failAfter) throws InitializationException {
+        configure(processor, numUsers, failAfter, null);
+    }
+
+    private void configure(final PutHive3Streaming processor, final int numUsers, final int failAfter,
+                           final BiFunction<Integer, MockRecordParser, Void> recordGenerator) throws InitializationException {
+        runner = TestRunners.newTestRunner(processor);
+        runner.setProperty(PutHive3Streaming.HIVE_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
+        MockRecordParser readerFactory = new MockRecordParser();
+        final RecordSchema recordSchema = AvroTypeUtil.createSchema(schema);
+        for (final RecordField recordField : recordSchema.getFields()) {
+            readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable());
+        }
+
+        if (recordGenerator == null) {
+            for (int i = 0; i < numUsers; i++) {
+                readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0);
+            }
+        } else {
+            recordGenerator.apply(numUsers, readerFactory);
+        }
+
+        readerFactory.failAfter(failAfter);
+
+        runner.addControllerService("mock-reader-factory", readerFactory);
+        runner.enableControllerService(readerFactory);
+
+        runner.setProperty(PutHive3Streaming.RECORD_READER, "mock-reader-factory");
+    }
+
+    private void configureComplex(final MockPutHive3Streaming processor, final int numUsers, final int failAfter,
+                                  final BiFunction<Integer, MockRecordParser, Void> recordGenerator) throws IOException, InitializationException {
+        final String avroSchema = IOUtils.toString(new FileInputStream("src/test/resources/array_of_records.avsc"), StandardCharsets.UTF_8);
+        schema = new Schema.Parser().parse(avroSchema);
+        processor.setFields(Arrays.asList(new FieldSchema("records",
+                serdeConstants.LIST_TYPE_NAME + "<"
+                        + serdeConstants.MAP_TYPE_NAME + "<"
+                        + serdeConstants.STRING_TYPE_NAME + ","
+                        +  serdeConstants.STRING_TYPE_NAME + ">>", "")));
+        runner = TestRunners.newTestRunner(processor);
+        runner.setProperty(PutHive3Streaming.HIVE_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
+        MockRecordParser readerFactory = new MockRecordParser();
+        final RecordSchema recordSchema = AvroTypeUtil.createSchema(schema);
+        for (final RecordField recordField : recordSchema.getFields()) {
+            readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable());
+        }
+
+        if (recordGenerator == null) {
+            Object[] mapArray = new Object[numUsers];
+            for (int i = 0; i < numUsers; i++) {
+                final int x = i;
+                Map<String, Object> map = new HashMap<String, Object>() {{
+                    put("name", "name" + x);
+                    put("age", x * 5);
+                }};
+                mapArray[i] = map;
+            }
+            readerFactory.addRecord((Object)mapArray);
+        } else {
+            recordGenerator.apply(numUsers, readerFactory);
+        }
+
+        readerFactory.failAfter(failAfter);
+
+        runner.addControllerService("mock-reader-factory", readerFactory);
+        runner.enableControllerService(readerFactory);
+
+        runner.setProperty(PutHive3Streaming.RECORD_READER, "mock-reader-factory");
+    }
+
+    @Test
+    public void testSetup() throws Exception {
+        configure(processor, 0);
+        runner.assertNotValid();
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.assertNotValid();
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.assertValid();
+        runner.run();
+    }
+
+    @Test
+    public void testUgiGetsCleared() throws Exception {
+        configure(processor, 0);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        processor.ugi = mock(UserGroupInformation.class);
+        runner.run();
+        assertNull(processor.ugi);
+    }
+
+    @Test
+    public void testUgiGetsSetIfSecure() throws Exception {
+        configure(processor, 1);
+        hiveConf.set(SecurityUtil.HADOOP_SECURITY_AUTHENTICATION, SecurityUtil.KERBEROS);
+        KerberosCredentialsService kcs = new MockKerberosCredentialsService();
+        runner.addControllerService("kcs", kcs);
+        runner.setProperty(KERBEROS_CREDENTIALS_SERVICE, "kcs");
+        runner.enableControllerService(kcs);
+        ugi = mock(UserGroupInformation.class);
+        when(hiveConfigurator.authenticate(eq(hiveConf), anyString(), anyString())).thenReturn(ugi);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue(new byte[0]);
+        runner.run();
+    }
+
+    @Test(expected = AssertionError.class)
+    public void testSetupWithKerberosAuthFailed() throws Exception {
+        configure(processor, 0);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.HIVE_CONFIGURATION_RESOURCES, "src/test/resources/core-site-security.xml, src/test/resources/hive-site-security.xml");
+
+        hiveConf.set(SecurityUtil.HADOOP_SECURITY_AUTHENTICATION, SecurityUtil.KERBEROS);
+        KerberosCredentialsService kcs = new MockKerberosCredentialsService(null, null);
+        runner.addControllerService("kcs", kcs);
+        runner.setProperty(KERBEROS_CREDENTIALS_SERVICE, "kcs");
+        runner.enableControllerService(kcs);
+        runner.assertNotValid();
+        runner.run();
+    }
+
+    @Test
+    public void onTrigger() throws Exception {
+        configure(processor, 1);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue(new byte[0]);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 1);
+        final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHive3Streaming.REL_SUCCESS).get(0);
+        assertEquals("1", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
+        assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES));
+    }
+
+    @Test
+    public void onTriggerComplex() throws Exception {
+        configureComplex(processor, 10, -1, null);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue(new byte[0]);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 1);
+        final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHive3Streaming.REL_SUCCESS).get(0);
+        // Schema is an array of size 10, so only one record is output
+        assertEquals("1", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
+        assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES));
+    }
+
+    @Test
+    public void onTriggerBadInput() throws Exception {
+        configure(processor, 1, 0);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue("I am not an Avro record".getBytes());
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 1);
+    }
+
+    @Test
+    public void onTriggerBadInputRollbackOnFailure() throws Exception {
+        configure(processor, 1, 0);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        runner.enqueue("I am not an Avro record".getBytes());
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+
+    @Test
+    public void onTriggerMultipleRecordsSingleTransaction() throws Exception {
+        configure(processor, 3);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        Map<String, Object> user1 = new HashMap<String, Object>() {
+            {
+                put("name", "Joe");
+                put("favorite_number", 146);
+            }
+        };
+        Map<String, Object> user2 = new HashMap<String, Object>() {
+            {
+                put("name", "Mary");
+                put("favorite_number", 42);
+            }
+        };
+        Map<String, Object> user3 = new HashMap<String, Object>() {
+            {
+                put("name", "Matt");
+                put("favorite_number", 3);
+            }
+        };
+        final List<Map<String, Object>> users = Arrays.asList(user1, user2, user3);
+        runner.enqueue(createAvroRecord(users));
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 1);
+        MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(PutHive3Streaming.REL_SUCCESS).get(0);
+        assertOutputAvroRecords(users, resultFlowFile);
+    }
+
+    @Test
+    public void onTriggerMultipleRecordsFailInMiddle() throws Exception {
+        configure(processor, 4);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        processor.setGenerateWriteFailure(true);
+        runner.enqueue(new byte[0]);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 1);
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 0);
+    }
+
+    @Test
+    public void onTriggerMultipleRecordsFailInMiddleRollbackOnFailure() throws Exception {
+        configure(processor, 3);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        processor.setGenerateWriteFailure(true);
+        runner.enqueue(new byte[0]);
+        try {
+            runner.run();
+            fail("ProcessException should be thrown, because any Hive Transaction is committed yet.");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    private void assertOutputAvroRecords(List<Map<String, Object>> expectedRecords, MockFlowFile resultFlowFile) throws IOException {
+        assertEquals(String.valueOf(expectedRecords.size()), resultFlowFile.getAttribute(PutHive3Streaming.HIVE_STREAMING_RECORD_COUNT_ATTR));
+
+        final DataFileStream<GenericRecord> reader = new DataFileStream<>(
+                new ByteArrayInputStream(resultFlowFile.toByteArray()),
+                new GenericDatumReader<>());
+
+        Schema schema = reader.getSchema();
+
+        // Verify that the schema is preserved
+        assertEquals(schema, new Schema.Parser().parse(new File("src/test/resources/user.avsc")));
+
+        GenericRecord record = null;
+        for (Map<String, Object> expectedRecord : expectedRecords) {
+            assertTrue(reader.hasNext());
+            record = reader.next(record);
+            final String name = record.get("name").toString();
+            final Integer favorite_number = (Integer) record.get("favorite_number");
+            assertNotNull(name);
+            assertNotNull(favorite_number);
+            assertNull(record.get("favorite_color"));
+            assertNull(record.get("scale"));
+
+            assertEquals(expectedRecord.get("name"), name);
+            assertEquals(expectedRecord.get("favorite_number"), favorite_number);
+        }
+        assertFalse(reader.hasNext());
+    }
+
+    @Test
+    public void onTriggerWithConnectFailure() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateConnectFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue(new byte[0]);
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    @Test
+    public void onTriggerWithConnectFailureRollbackOnFailure() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateConnectFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        runner.enqueue(new byte[0]);
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    @Test
+    public void onTriggerWithWriteFailure() throws Exception {
+        configure(processor, 2);
+        processor.setGenerateWriteFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.enqueue(new byte[0]);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 1);
+        final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHive3Streaming.REL_FAILURE).get(0);
+        assertEquals("0", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
+        assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES));
+    }
+
+    @Test
+    public void onTriggerWithWriteFailureRollbackOnFailure() throws Exception {
+        configure(processor, 2);
+        processor.setGenerateWriteFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        Map<String, Object> user1 = new HashMap<String, Object>() {
+            {
+                put("name", "Joe");
+                put("favorite_number", 146);
+            }
+        };
+        Map<String, Object> user2 = new HashMap<String, Object>() {
+            {
+                put("name", "Mary");
+                put("favorite_number", 42);
+            }
+        };
+        runner.enqueue(createAvroRecord(Arrays.asList(user1, user2)));
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    @Test
+    public void onTriggerWithSerializationError() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateSerializationError(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        Map<String, Object> user1 = new HashMap<String, Object>() {
+            {
+                put("name", "Joe");
+                put("favorite_number", 146);
+            }
+        };
+        runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 1);
+    }
+
+    @Test
+    public void onTriggerWithSerializationErrorRollbackOnFailure() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateSerializationError(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        Map<String, Object> user1 = new HashMap<String, Object>() {
+            {
+                put("name", "Joe");
+                put("favorite_number", 146);
+            }
+        };
+        runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    @Test
+    public void onTriggerWithCommitFailure() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateCommitFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "false");
+        runner.enqueue(new byte[0]);
+        runner.run();
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 1);
+    }
+
+    @Test
+    public void onTriggerWithCommitFailureRollbackOnFailure() throws Exception {
+        configure(processor, 1);
+        processor.setGenerateCommitFailure(true);
+        runner.setProperty(PutHive3Streaming.METASTORE_URI, "thrift://localhost:9083");
+        runner.setProperty(PutHive3Streaming.DB_NAME, "default");
+        runner.setProperty(PutHive3Streaming.TABLE_NAME, "users");
+        runner.setProperty(PutHive3Streaming.ROLLBACK_ON_FAILURE, "true");
+        runner.enqueue(new byte[0]);
+        try {
+            runner.run();
+            fail("ProcessException should be thrown");
+        } catch (AssertionError e) {
+            assertTrue(e.getCause() instanceof ProcessException);
+        }
+
+        runner.assertTransferCount(PutHive3Streaming.REL_FAILURE, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_SUCCESS, 0);
+        runner.assertTransferCount(PutHive3Streaming.REL_RETRY, 0);
+        // Assert incoming FlowFile stays in input queue.
+        assertEquals(1, runner.getQueueSize().getObjectCount());
+    }
+
+    @Test
+    public void cleanup() {
+        processor.cleanup();
+    }
+
+    private byte[] createAvroRecord(List<Map<String, Object>> records) throws IOException {
+        final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));
+
+        List<GenericRecord> users = new LinkedList<>();
+        for (Map<String, Object> record : records) {
+            final GenericRecord user = new GenericData.Record(schema);
+            user.put("name", record.get("name"));
+            user.put("favorite_number", record.get("favorite_number"));
+            user.put("favorite_color", record.get("favorite_color"));
+            users.add(user);
+        }
+        final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
+            dataFileWriter.create(schema, out);
+            for (final GenericRecord user : users) {
+                dataFileWriter.append(user);
+            }
+        }
+        return out.toByteArray();
+
+    }
+
+    private class MockPutHive3Streaming extends PutHive3Streaming {
+
+        private boolean generateConnectFailure = false;
+        private boolean generateWriteFailure = false;
+        private boolean generateSerializationError = false;
+        private boolean generateCommitFailure = false;
+        private List<FieldSchema> schema = Arrays.asList(
+                new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, ""),
+                new FieldSchema("favorite_number", serdeConstants.INT_TYPE_NAME, ""),
+                new FieldSchema("favorite_color", serdeConstants.STRING_TYPE_NAME, ""),
+                new FieldSchema("scale", serdeConstants.DOUBLE_TYPE_NAME, "")
+        );
+
+        @Override
+        StreamingConnection makeStreamingConnection(HiveOptions options, RecordReader reader) throws StreamingException {
+
+            if (generateConnectFailure) {
+                throw new StubConnectionError("Unit Test - Connection Error");
+            }
+
+            HiveRecordWriter hiveRecordWriter = new HiveRecordWriter(reader, getLogger());
+            MockHiveStreamingConnection hiveConnection = new MockHiveStreamingConnection(options, reader, hiveRecordWriter, schema);
+            hiveConnection.setGenerateWriteFailure(generateWriteFailure);
+            hiveConnection.setGenerateSerializationError(generateSerializationError);
+            hiveConnection.setGenerateCommitFailure(generateCommitFailure);
+            return hiveConnection;
+        }
+
+        void setGenerateConnectFailure(boolean generateConnectFailure) {
+            this.generateConnectFailure = generateConnectFailure;
+        }
+
+        void setGenerateWriteFailure(boolean generateWriteFailure) {
+            this.generateWriteFailure = generateWriteFailure;
+        }
+
+        void setGenerateSerializationError(boolean generateSerializationError) {
+            this.generateSerializationError = generateSerializationError;
+        }
+
+        void setGenerateCommitFailure(boolean generateCommitFailure) {
+            this.generateCommitFailure = generateCommitFailure;
+        }
+
+        void setFields(List<FieldSchema> schema) {
+            this.schema = schema;
+        }
+    }
+
+    private class MockHiveStreamingConnection implements StreamingConnection {
+
+        private boolean generateWriteFailure = false;
+        private boolean generateSerializationError = false;
+        private boolean generateCommitFailure = false;
+        private int writeAttemptCount = 0;
+        private ConnectionStats connectionStats;
+        private HiveOptions options;
+        private RecordWriter writer;
+        private HiveConf hiveConf;
+        private Table table;
+        private String metastoreURI;
+
+        MockHiveStreamingConnection(HiveOptions options, RecordReader reader, RecordWriter recordWriter, List<FieldSchema> schema) {
+            this.options = options;
+            metastoreURI = options.getMetaStoreURI();
+            this.writer = recordWriter;
+            this.hiveConf = this.options.getHiveConf();
+            connectionStats = new ConnectionStats();
+            this.table = new Table(Table.getEmptyTable(options.getDatabaseName(), options.getTableName()));
+            this.table.setFields(schema);
+            StorageDescriptor sd = this.table.getSd();
+            sd.setOutputFormat(OrcOutputFormat.class.getName());
+            sd.setLocation(TARGET_HIVE);
+        }
+
+        @Override
+        public HiveConf getHiveConf() {
+            return hiveConf;
+        }
+
+        @Override
+        public void beginTransaction() throws StreamingException {
+            writer.init(this, 0, 100);
+        }
+
+        @Override
+        public synchronized void write(byte[] record) throws StreamingException {
+            throw new UnsupportedOperationException(this.getClass().getName() + " does not support writing of records via bytes, only via an InputStream");
+        }
+
+        @Override
+        public void write(InputStream inputStream) throws StreamingException {
+            try {
+                if (generateWriteFailure) {
+                    throw new StubStreamingIOFailure("Unit Test - Streaming IO Failure");
+                }
+                if (generateSerializationError) {
+                    throw new StubSerializationError("Unit Test - Serialization error", new Exception());
+                }
+                this.writer.write(writeAttemptCount, inputStream);
+            } finally {
+                writeAttemptCount++;
+            }
+        }
+
+        @Override
+        public void commitTransaction() throws StreamingException {
+            if (generateCommitFailure) {
+                throw new StubTransactionError("Unit Test - Commit Failure");
+            }
+            connectionStats.incrementCommittedTransactions();
+        }
+
+        @Override
+        public void abortTransaction() throws StreamingException {
+            connectionStats.incrementAbortedTransactions();
+        }
+
+        @Override
+        public void close() {
+            // closing the connection shouldn't throw an exception
+        }
+
+        @Override
+        public ConnectionStats getConnectionStats() {
+            return connectionStats;
+        }
+
+        public void setGenerateWriteFailure(boolean generateWriteFailure) {
+            this.generateWriteFailure = generateWriteFailure;
+        }
+
+        public void setGenerateSerializationError(boolean generateSerializationError) {
+            this.generateSerializationError = generateSerializationError;
+        }
+
+        public void setGenerateCommitFailure(boolean generateCommitFailure) {
+            this.generateCommitFailure = generateCommitFailure;
+        }
+
+        @Override
+        public String getMetastoreUri() {
+            return metastoreURI;
+        }
+
+        @Override
+        public Table getTable() {
+            return table;
+        }
+
+        @Override
+        public List<String> getStaticPartitionValues() {
+            return null;
+        }
+
+        @Override
+        public boolean isPartitionedTable() {
+            return false;
+        }
+
+        @Override
+        public boolean isDynamicPartitioning() {
+            return false;
+        }
+
+        @Override
+        public String getAgentInfo() {
+            return null;
+        }
+
+        @Override
+        public PartitionInfo createPartitionIfNotExists(List<String> list) throws StreamingException {
+            return null;
+        }
+    }
+
+    private static class MockKerberosCredentialsService implements KerberosCredentialsService, ControllerService {
+
+        private String keytab = "src/test/resources/fake.keytab";
+        private String principal = "test@REALM.COM";
+
+        public MockKerberosCredentialsService() {
+        }
+
+        public MockKerberosCredentialsService(String keytab, String principal) {
+            this.keytab = keytab;
+            this.principal = principal;
+        }
+
+        @Override
+        public String getKeytab() {
+            return keytab;
+        }
+
+        @Override
+        public String getPrincipal() {
+            return principal;
+        }
+
+        @Override
+        public void initialize(ControllerServiceInitializationContext context) throws InitializationException {
+
+        }
+
+        @Override
+        public Collection<ValidationResult> validate(ValidationContext context) {
+            return Collections.EMPTY_LIST;
+        }
+
+        @Override
+        public PropertyDescriptor getPropertyDescriptor(String name) {
+            return null;
+        }
+
+        @Override
+        public void onPropertyModified(PropertyDescriptor descriptor, String oldValue, String newValue) {
+
+        }
+
+        @Override
+        public List<PropertyDescriptor> getPropertyDescriptors() {
+            return null;
+        }
+
+        @Override
+        public String getIdentifier() {
+            return "kcs";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive3QL.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive3QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive3QL.java
new file mode 100644
index 0000000..50e83ac
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive3QL.java
@@ -0,0 +1,539 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hive;
+
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.nifi.controller.AbstractControllerService;
+import org.apache.nifi.dbcp.DBCPService;
+import org.apache.nifi.dbcp.hive.Hive3DBCPService;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.apache.nifi.util.hive.HiveJdbcCommon;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static org.apache.nifi.processors.hive.SelectHive3QL.HIVEQL_OUTPUT_FORMAT;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
+import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestSelectHive3QL {
+
+    private static final Logger LOGGER;
+    private final static String MAX_ROWS_KEY = "maxRows";
+
+    static {
+        System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
+        System.setProperty("org.slf4j.simpleLogger.showDateTime", "true");
+        System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug");
+        System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHive3QL", "debug");
+        System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHive3QL", "debug");
+        LOGGER = LoggerFactory.getLogger(TestSelectHive3QL.class);
+    }
+
+    private final static String DB_LOCATION = "target/db";
+
+    private final static String QUERY_WITH_EL = "select "
+            + "  PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
+            + " from persons PER"
+            + " where PER.ID > ${person.id}";
+
+    private final static String QUERY_WITHOUT_EL = "select "
+            + "  PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
+            + " from persons PER"
+            + " where PER.ID > 10";
+
+
+    @BeforeClass
+    public static void setupClass() {
+        System.setProperty("derby.stream.error.file", "target/derby.log");
+    }
+
+    private TestRunner runner;
+
+    @Before
+    public void setup() throws InitializationException {
+        final DBCPService dbcp = new DBCPServiceSimpleImpl();
+        final Map<String, String> dbcpProperties = new HashMap<>();
+
+        runner = TestRunners.newTestRunner(SelectHive3QL.class);
+        runner.addControllerService("dbcp", dbcp, dbcpProperties);
+        runner.enableControllerService(dbcp);
+        runner.setProperty(SelectHive3QL.HIVE_DBCP_SERVICE, "dbcp");
+    }
+
+    @Test
+    public void testIncomingConnectionWithNoFlowFile() throws InitializationException {
+        runner.setIncomingConnection(true);
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons");
+        runner.run();
+        runner.assertTransferCount(SelectHive3QL.REL_SUCCESS, 0);
+        runner.assertTransferCount(SelectHive3QL.REL_FAILURE, 0);
+    }
+
+    @Test
+    public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException {
+        runner.setIncomingConnection(false);
+        invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro");
+    }
+
+    @Test
+    public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException {
+        invokeOnTrigger(QUERY_WITH_EL, true, "Avro");
+    }
+
+
+    @Test
+    public void testWithNullIntColumn() throws SQLException {
+        // remove previous test database, if any
+        final File dbLocation = new File(DB_LOCATION);
+        dbLocation.delete();
+
+        // load test data to database
+        final Connection con = ((Hive3DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+
+        try {
+            stmt.execute("drop table TEST_NULL_INT");
+        } catch (final SQLException sqle) {
+            // Nothing to do, probably means the table didn't exist
+        }
+
+        stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))");
+
+        stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)");
+        stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)");
+
+        runner.setIncomingConnection(false);
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT");
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, 1);
+        runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHive3QL.RESULT_ROW_COUNT, "2");
+    }
+
+    @Test
+    public void testWithSqlException() throws SQLException {
+        // remove previous test database, if any
+        final File dbLocation = new File(DB_LOCATION);
+        dbLocation.delete();
+
+        // load test data to database
+        final Connection con = ((Hive3DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+
+        try {
+            stmt.execute("drop table TEST_NO_ROWS");
+        } catch (final SQLException sqle) {
+            // Nothing to do, probably means the table didn't exist
+        }
+
+        stmt.execute("create table TEST_NO_ROWS (id integer)");
+
+        runner.setIncomingConnection(false);
+        // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS");
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_FAILURE, 1);
+    }
+
+    @Test
+    public void testWithBadSQL() throws SQLException {
+        final String BAD_SQL = "create table TEST_NO_ROWS (id integer)";
+
+        // Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent)
+        runner.setIncomingConnection(true);
+        // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
+        runner.enqueue(BAD_SQL);
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_FAILURE, 1);
+        MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive3QL.REL_FAILURE).get(0);
+        flowFile.assertContentEquals(BAD_SQL);
+        flowFile.assertAttributeEquals("parentIds", null);
+        runner.clearTransferState();
+
+        // Test with no incoming flow file (an empty flow file is transferred)
+        runner.setIncomingConnection(false);
+        // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, BAD_SQL);
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_FAILURE, 1);
+        flowFile = runner.getFlowFilesForRelationship(SelectHive3QL.REL_FAILURE).get(0);
+        flowFile.assertContentEquals("");
+    }
+
+    @Test
+    public void invokeOnTriggerWithCsv()
+            throws InitializationException, ClassNotFoundException, SQLException, IOException {
+        invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV);
+    }
+
+    @Test
+    public void invokeOnTriggerWithAvro()
+            throws InitializationException, ClassNotFoundException, SQLException, IOException {
+        invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO);
+    }
+
+    public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat)
+            throws InitializationException, ClassNotFoundException, SQLException, IOException {
+
+        // remove previous test database, if any
+        final File dbLocation = new File(DB_LOCATION);
+        dbLocation.delete();
+
+        // load test data to database
+        final Connection con = ((Hive3DBCPService) runner.getControllerService("dbcp")).getConnection();
+        final Statement stmt = con.createStatement();
+        try {
+            stmt.execute("drop table persons");
+        } catch (final SQLException sqle) {
+            // Nothing to do here, the table didn't exist
+        }
+
+        stmt.execute("create table persons (id integer, name varchar(100), code integer)");
+        Random rng = new Random(53496);
+        final int nrOfRows = 100;
+        stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")");
+        for (int i = 2; i < nrOfRows; i++) {
+            stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")");
+        }
+        stmt.executeUpdate("insert into persons values (" + nrOfRows + ", 'Last Person', NULL)");
+
+        LOGGER.info("test data loaded");
+
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, query);
+        runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat);
+
+        if (incomingFlowFile) {
+            // incoming FlowFile content is not used, but attributes are used
+            final Map<String, String> attributes = new HashMap<>();
+            attributes.put("person.id", "10");
+            runner.enqueue("Hello".getBytes(), attributes);
+        }
+
+        runner.setIncomingConnection(incomingFlowFile);
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, 1);
+
+        final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS);
+        MockFlowFile flowFile = flowfiles.get(0);
+        final InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
+        long recordsFromStream = 0;
+        if (AVRO.equals(outputFormat)) {
+            assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
+            final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
+            try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
+                GenericRecord record = null;
+                while (dataFileReader.hasNext()) {
+                    // Reuse record object by passing it to next(). This saves us from
+                    // allocating and garbage collecting many objects for files with
+                    // many items.
+                    record = dataFileReader.next(record);
+                    recordsFromStream++;
+                }
+            }
+        } else {
+            assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
+            BufferedReader br = new BufferedReader(new InputStreamReader(in));
+
+            String headerRow = br.readLine();
+            // Derby capitalizes column names
+            assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow);
+
+            // Validate rows
+            String line;
+            while ((line = br.readLine()) != null) {
+                recordsFromStream++;
+                String[] values = line.split(",");
+                if (recordsFromStream < (nrOfRows - 10)) {
+                    assertEquals(3, values.length);
+                    assertTrue(values[1].startsWith("\""));
+                    assertTrue(values[1].endsWith("\""));
+                } else {
+                    assertEquals(2, values.length); // Middle value is null
+                }
+            }
+        }
+        assertEquals(nrOfRows - 10, recordsFromStream);
+        assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHive3QL.RESULT_ROW_COUNT)));
+        flowFile.assertAttributeEquals(AbstractHive3QLProcessor.ATTR_INPUT_TABLES, "persons");
+    }
+
+    @Test
+    public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException {
+
+        // load test data to database
+        final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+        InputStream in;
+        MockFlowFile mff;
+
+        try {
+            stmt.execute("drop table TEST_QUERY_DB_TABLE");
+        } catch (final SQLException sqle) {
+            // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
+        }
+
+        stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
+        int rowCount = 0;
+        //create larger row set
+        for (int batch = 0; batch < 100; batch++) {
+            stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
+            rowCount++;
+        }
+
+        runner.setIncomingConnection(false);
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
+        runner.setProperty(SelectHive3QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
+        runner.setProperty(SelectHive3QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
+        runner.setVariable(MAX_ROWS_KEY, "9");
+
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, 12);
+
+        //ensure all but the last file have 9 records each
+        for (int ff = 0; ff < 11; ff++) {
+            mff = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(ff);
+            in = new ByteArrayInputStream(mff.toByteArray());
+            assertEquals(9, getNumberOfRecordsFromStream(in));
+
+            mff.assertAttributeExists("fragment.identifier");
+            assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
+            assertEquals("12", mff.getAttribute("fragment.count"));
+        }
+
+        //last file should have 1 record
+        mff = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(11);
+        in = new ByteArrayInputStream(mff.toByteArray());
+        assertEquals(1, getNumberOfRecordsFromStream(in));
+        mff.assertAttributeExists("fragment.identifier");
+        assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
+        assertEquals("12", mff.getAttribute("fragment.count"));
+        runner.clearTransferState();
+    }
+
+    @Test
+    public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException {
+        // load test data to database
+        final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+
+        try {
+            stmt.execute("drop table TEST_QUERY_DB_TABLE");
+        } catch (final SQLException sqle) {
+            // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
+        }
+
+        stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
+        int rowCount = 0;
+        //create larger row set
+        for (int batch = 0; batch < 100; batch++) {
+            stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
+            rowCount++;
+        }
+
+        runner.setIncomingConnection(true);
+        runner.setProperty(SelectHive3QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
+        runner.setProperty(SelectHive3QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
+        runner.setVariable(MAX_ROWS_KEY, "9");
+
+        Map<String, String> attributes = new HashMap<String, String>();
+        attributes.put("hiveql.args.1.value", "1");
+        attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
+        runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes );
+
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, 1);
+        runner.clearTransferState();
+    }
+
+    @Test
+    public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException {
+
+        // load test data to database
+        final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+        InputStream in;
+        MockFlowFile mff;
+
+        try {
+            stmt.execute("drop table TEST_QUERY_DB_TABLE");
+        } catch (final SQLException sqle) {
+            // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
+        }
+
+        stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
+        int rowCount = 0;
+        //create larger row set
+        for (int batch = 0; batch < 100; batch++) {
+            stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
+            rowCount++;
+        }
+
+        runner.setIncomingConnection(true);
+        runner.setProperty(SelectHive3QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
+        runner.setProperty(SelectHive3QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV);
+
+        runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap<String, String>() {{
+            put(MAX_ROWS_KEY, "9");
+        }});
+
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, 12);
+
+        //ensure all but the last file have 9 records (10 lines = 9 records + header) each
+        for (int ff = 0; ff < 11; ff++) {
+            mff = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(ff);
+            in = new ByteArrayInputStream(mff.toByteArray());
+            BufferedReader br = new BufferedReader(new InputStreamReader(in));
+            assertEquals(10, br.lines().count());
+
+            mff.assertAttributeExists("fragment.identifier");
+            assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
+            assertEquals("12", mff.getAttribute("fragment.count"));
+        }
+
+        //last file should have 1 record (2 lines = 1 record + header)
+        mff = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(11);
+        in = new ByteArrayInputStream(mff.toByteArray());
+        BufferedReader br = new BufferedReader(new InputStreamReader(in));
+        assertEquals(2, br.lines().count());
+        mff.assertAttributeExists("fragment.identifier");
+        assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
+        assertEquals("12", mff.getAttribute("fragment.count"));
+        runner.clearTransferState();
+    }
+
+    @Test
+    public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException {
+
+        // load test data to database
+        final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
+        Statement stmt = con.createStatement();
+        InputStream in;
+        MockFlowFile mff;
+
+        try {
+            stmt.execute("drop table TEST_QUERY_DB_TABLE");
+        } catch (final SQLException sqle) {
+            // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
+        }
+
+        stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
+        int rowCount = 0;
+        //create larger row set
+        for (int batch = 0; batch < 100; batch++) {
+            stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
+            rowCount++;
+        }
+
+        runner.setIncomingConnection(false);
+        runner.setProperty(SelectHive3QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
+        runner.setProperty(SelectHive3QL.MAX_ROWS_PER_FLOW_FILE, "9");
+        Integer maxFragments = 3;
+        runner.setProperty(SelectHive3QL.MAX_FRAGMENTS, maxFragments.toString());
+
+        runner.run();
+        runner.assertAllFlowFilesTransferred(SelectHive3QL.REL_SUCCESS, maxFragments);
+
+        for (int i = 0; i < maxFragments; i++) {
+            mff = runner.getFlowFilesForRelationship(SelectHive3QL.REL_SUCCESS).get(i);
+            in = new ByteArrayInputStream(mff.toByteArray());
+            assertEquals(9, getNumberOfRecordsFromStream(in));
+
+            mff.assertAttributeExists("fragment.identifier");
+            assertEquals(Integer.toString(i), mff.getAttribute("fragment.index"));
+            assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count"));
+        }
+
+        runner.clearTransferState();
+    }
+
+    private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
+        final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
+        try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
+            GenericRecord record = null;
+            long recordsFromStream = 0;
+            while (dataFileReader.hasNext()) {
+                // Reuse record object by passing it to next(). This saves us from
+                // allocating and garbage collecting many objects for files with
+                // many items.
+                record = dataFileReader.next(record);
+                recordsFromStream += 1;
+            }
+
+            return recordsFromStream;
+        }
+    }
+
+    /**
+     * Simple implementation only for SelectHive3QL processor testing.
+     */
+    private class DBCPServiceSimpleImpl extends AbstractControllerService implements Hive3DBCPService {
+
+        @Override
+        public String getIdentifier() {
+            return "dbcp";
+        }
+
+        @Override
+        public Connection getConnection() throws ProcessException {
+            try {
+                Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
+                return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true");
+            } catch (final Exception e) {
+                throw new ProcessException("getConnection failed: " + e);
+            }
+        }
+
+        @Override
+        public String getConnectionURL() {
+            return "jdbc:derby:" + DB_LOCATION + ";create=true";
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/da99f873/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/orc/PutORCTest.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/orc/PutORCTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/orc/PutORCTest.java
new file mode 100644
index 0000000..e1af5a1
--- /dev/null
+++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-processors/src/test/java/org/apache/nifi/processors/orc/PutORCTest.java
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.orc;
+
+import org.apache.avro.Schema;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.orc.RecordReader;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.log4j.BasicConfigurator;
+import org.apache.nifi.avro.AvroTypeUtil;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processors.hadoop.exception.FailureException;
+import org.apache.nifi.processors.hadoop.record.HDFSRecordWriter;
+import org.apache.nifi.provenance.ProvenanceEventRecord;
+import org.apache.nifi.provenance.ProvenanceEventType;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.record.MockRecordParser;
+import org.apache.nifi.serialization.record.RecordField;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.RecordSet;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.temporal.ChronoField;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.when;
+
+public class PutORCTest {
+
+    private static final String DIRECTORY = "target";
+    private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml";
+
+    private Schema schema;
+    private Configuration testConf;
+    private PutORC proc;
+    private TestRunner testRunner;
+
+    @BeforeClass
+    public static void setupLogging() {
+        BasicConfigurator.configure();
+    }
+
+    @Before
+    public void setup() throws IOException {
+        final String avroSchema = IOUtils.toString(new FileInputStream("src/test/resources/user.avsc"), StandardCharsets.UTF_8);
+        schema = new Schema.Parser().parse(avroSchema);
+
+        testConf = new Configuration();
+        testConf.addResource(new Path(TEST_CONF_PATH));
+
+        proc = new PutORC();
+    }
+
+    private void configure(final PutORC putORC, final int numUsers) throws InitializationException {
+        configure(putORC, numUsers, null);
+    }
+
+    private void configure(final PutORC putORC, final int numUsers, final BiFunction<Integer, MockRecordParser, Void> recordGenerator) throws InitializationException {
+        testRunner = TestRunners.newTestRunner(putORC);
+        testRunner.setProperty(PutORC.HADOOP_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
+        testRunner.setProperty(PutORC.DIRECTORY, DIRECTORY);
+
+        MockRecordParser readerFactory = new MockRecordParser();
+
+        final RecordSchema recordSchema = AvroTypeUtil.createSchema(schema);
+        for (final RecordField recordField : recordSchema.getFields()) {
+            readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable());
+        }
+
+        if (recordGenerator == null) {
+            for (int i = 0; i < numUsers; i++) {
+                readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0);
+            }
+        } else {
+            recordGenerator.apply(numUsers, readerFactory);
+        }
+
+        testRunner.addControllerService("mock-reader-factory", readerFactory);
+        testRunner.enableControllerService(readerFactory);
+
+        testRunner.setProperty(PutORC.RECORD_READER, "mock-reader-factory");
+    }
+
+    @Test
+    public void testWriteORCWithDefaults() throws IOException, InitializationException {
+        configure(proc, 100);
+
+        final String filename = "testORCWithDefaults-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.setProperty(PutORC.HIVE_TABLE_NAME, "myTable");
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_SUCCESS, 1);
+
+        final Path orcFile = new Path(DIRECTORY + "/" + filename);
+
+        // verify the successful flow file has the expected attributes
+        final MockFlowFile mockFlowFile = testRunner.getFlowFilesForRelationship(PutORC.REL_SUCCESS).get(0);
+        mockFlowFile.assertAttributeEquals(PutORC.ABSOLUTE_HDFS_PATH_ATTRIBUTE, orcFile.getParent().toString());
+        mockFlowFile.assertAttributeEquals(CoreAttributes.FILENAME.key(), filename);
+        mockFlowFile.assertAttributeEquals(PutORC.RECORD_COUNT_ATTR, "100");
+        mockFlowFile.assertAttributeEquals(PutORC.HIVE_DDL_ATTRIBUTE,
+                "CREATE EXTERNAL TABLE IF NOT EXISTS myTable (name STRING, favorite_number INT, favorite_color STRING, scale DOUBLE) STORED AS ORC");
+
+        // verify we generated a provenance event
+        final List<ProvenanceEventRecord> provEvents = testRunner.getProvenanceEvents();
+        assertEquals(1, provEvents.size());
+
+        // verify it was a SEND event with the correct URI
+        final ProvenanceEventRecord provEvent = provEvents.get(0);
+        assertEquals(ProvenanceEventType.SEND, provEvent.getEventType());
+        // If it runs with a real HDFS, the protocol will be "hdfs://", but with a local filesystem, just assert the filename.
+        Assert.assertTrue(provEvent.getTransitUri().endsWith(DIRECTORY + "/" + filename));
+
+        // verify the content of the ORC file by reading it back in
+        verifyORCUsers(orcFile, 100);
+
+        // verify we don't have the temp dot file after success
+        final File tempOrcFile = new File(DIRECTORY + "/." + filename);
+        Assert.assertFalse(tempOrcFile.exists());
+
+        // verify we DO have the CRC file after success
+        final File crcAvroORCFile = new File(DIRECTORY + "/." + filename + ".crc");
+        Assert.assertTrue(crcAvroORCFile.exists());
+    }
+
+    @Test
+    public void testWriteORCWithAvroLogicalTypes() throws IOException, InitializationException {
+        final String avroSchema = IOUtils.toString(new FileInputStream("src/test/resources/user_logical_types.avsc"), StandardCharsets.UTF_8);
+        schema = new Schema.Parser().parse(avroSchema);
+        Calendar now = Calendar.getInstance();
+        LocalTime nowTime = LocalTime.now();
+        LocalDateTime nowDateTime = LocalDateTime.now();
+        LocalDate epoch = LocalDate.ofEpochDay(0);
+        LocalDate nowDate = LocalDate.now();
+
+        final int timeMillis = nowTime.get(ChronoField.MILLI_OF_DAY);
+        final Timestamp timestampMillis = Timestamp.valueOf(nowDateTime);
+        final Date dt = Date.valueOf(nowDate);
+        final double dec = 1234.56;
+
+        configure(proc, 10, (numUsers, readerFactory) -> {
+            for (int i = 0; i < numUsers; i++) {
+                readerFactory.addRecord(
+                        i,
+                        timeMillis,
+                        timestampMillis,
+                        dt,
+                        dec);
+            }
+            return null;
+        });
+
+        final String filename = "testORCWithDefaults-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.setProperty(PutORC.HIVE_TABLE_NAME, "myTable");
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_SUCCESS, 1);
+
+        final Path orcFile = new Path(DIRECTORY + "/" + filename);
+
+        // verify the successful flow file has the expected attributes
+        final MockFlowFile mockFlowFile = testRunner.getFlowFilesForRelationship(PutORC.REL_SUCCESS).get(0);
+        mockFlowFile.assertAttributeEquals(PutORC.ABSOLUTE_HDFS_PATH_ATTRIBUTE, orcFile.getParent().toString());
+        mockFlowFile.assertAttributeEquals(CoreAttributes.FILENAME.key(), filename);
+        mockFlowFile.assertAttributeEquals(PutORC.RECORD_COUNT_ATTR, "10");
+        // DDL will be created with field names normalized (lowercased, e.g.) for Hive by default
+        mockFlowFile.assertAttributeEquals(PutORC.HIVE_DDL_ATTRIBUTE,
+                "CREATE EXTERNAL TABLE IF NOT EXISTS myTable (id INT, timemillis INT, timestampmillis TIMESTAMP, dt DATE, dec DOUBLE) STORED AS ORC");
+
+        // verify we generated a provenance event
+        final List<ProvenanceEventRecord> provEvents = testRunner.getProvenanceEvents();
+        assertEquals(1, provEvents.size());
+
+        // verify it was a SEND event with the correct URI
+        final ProvenanceEventRecord provEvent = provEvents.get(0);
+        assertEquals(ProvenanceEventType.SEND, provEvent.getEventType());
+        // If it runs with a real HDFS, the protocol will be "hdfs://", but with a local filesystem, just assert the filename.
+        Assert.assertTrue(provEvent.getTransitUri().endsWith(DIRECTORY + "/" + filename));
+
+        // verify the content of the ORC file by reading it back in
+        verifyORCUsers(orcFile, 10, (x, currUser) -> {
+                    assertEquals((int) currUser, ((IntWritable) x.get(0)).get());
+                    assertEquals(timeMillis, ((IntWritable) x.get(1)).get());
+                    assertEquals(timestampMillis, ((TimestampWritable) x.get(2)).getTimestamp());
+                    assertEquals(dt, ((DateWritable) x.get(3)).get());
+                    assertEquals(dec, ((DoubleWritable) x.get(4)).get(), Double.MIN_VALUE);
+                    return null;
+                }
+        );
+
+        // verify we don't have the temp dot file after success
+        final File tempOrcFile = new File(DIRECTORY + "/." + filename);
+        Assert.assertFalse(tempOrcFile.exists());
+
+        // verify we DO have the CRC file after success
+        final File crcAvroORCFile = new File(DIRECTORY + "/." + filename + ".crc");
+        Assert.assertTrue(crcAvroORCFile.exists());
+    }
+
+    @Test
+    public void testValidSchemaWithELShouldBeSuccessful() throws InitializationException {
+        configure(proc, 10);
+
+        final String filename = "testValidSchemaWithELShouldBeSuccessful-" + System.currentTimeMillis();
+
+        // don't provide my.schema as an attribute
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+        flowFileAttributes.put("my.schema", schema.toString());
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_SUCCESS, 1);
+    }
+
+    @Test
+    public void testMalformedRecordExceptionFromReaderShouldRouteToFailure() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
+        configure(proc, 10);
+
+        final org.apache.nifi.serialization.RecordReader recordReader = Mockito.mock(org.apache.nifi.serialization.RecordReader.class);
+        when(recordReader.nextRecord()).thenThrow(new MalformedRecordException("ERROR"));
+
+        final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
+        when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
+        when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
+
+        testRunner.addControllerService("mock-reader-factory", readerFactory);
+        testRunner.enableControllerService(readerFactory);
+        testRunner.setProperty(PutORC.RECORD_READER, "mock-reader-factory");
+
+        final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_FAILURE, 1);
+    }
+
+    @Test
+    public void testIOExceptionCreatingWriterShouldRouteToRetry() throws InitializationException {
+        final PutORC proc = new PutORC() {
+            @Override
+            public HDFSRecordWriter createHDFSRecordWriter(ProcessContext context, FlowFile flowFile, Configuration conf, Path path, RecordSchema schema)
+                    throws IOException {
+                throw new IOException("IOException");
+            }
+        };
+        configure(proc, 0);
+
+        final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_RETRY, 1);
+    }
+
+    @Test
+    public void testIOExceptionFromReaderShouldRouteToRetry() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
+        configure(proc, 10);
+
+        final RecordSet recordSet = Mockito.mock(RecordSet.class);
+        when(recordSet.next()).thenThrow(new IOException("ERROR"));
+
+        final org.apache.nifi.serialization.RecordReader recordReader = Mockito.mock(org.apache.nifi.serialization.RecordReader.class);
+        when(recordReader.createRecordSet()).thenReturn(recordSet);
+        when(recordReader.getSchema()).thenReturn(AvroTypeUtil.createSchema(schema));
+
+        final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
+        when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
+        when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
+
+        testRunner.addControllerService("mock-reader-factory", readerFactory);
+        testRunner.enableControllerService(readerFactory);
+        testRunner.setProperty(PutORC.RECORD_READER, "mock-reader-factory");
+
+        final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_RETRY, 1);
+    }
+
+    @Test
+    public void testIOExceptionRenamingShouldRouteToRetry() throws InitializationException {
+        final PutORC proc = new PutORC() {
+            @Override
+            protected void rename(FileSystem fileSystem, Path srcFile, Path destFile)
+                    throws IOException, InterruptedException, FailureException {
+                throw new IOException("IOException renaming");
+            }
+        };
+
+        configure(proc, 10);
+
+        final String filename = "testIOExceptionRenamingShouldRouteToRetry-" + System.currentTimeMillis();
+
+        final Map<String, String> flowFileAttributes = new HashMap<>();
+        flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
+
+        testRunner.enqueue("trigger", flowFileAttributes);
+        testRunner.run();
+        testRunner.assertAllFlowFilesTransferred(PutORC.REL_RETRY, 1);
+
+        // verify we don't have the temp dot file after success
+        final File tempAvroORCFile = new File(DIRECTORY + "/." + filename);
+        Assert.assertFalse(tempAvroORCFile.exists());
+    }
+
+    private void verifyORCUsers(final Path orcUsers, final int numExpectedUsers) throws IOException {
+        verifyORCUsers(orcUsers, numExpectedUsers, null);
+    }
+
+    private void verifyORCUsers(final Path orcUsers, final int numExpectedUsers, BiFunction<List<Object>, Integer, Void> assertFunction) throws IOException {
+        Reader reader = OrcFile.createReader(orcUsers, OrcFile.readerOptions(testConf));
+        RecordReader recordReader = reader.rows();
+
+        TypeInfo typeInfo =
+                TypeInfoUtils.getTypeInfoFromTypeString("struct<name:string,favorite_number:int,favorite_color:string,scale:double>");
+        StructObjectInspector inspector = (StructObjectInspector)
+                OrcStruct.createObjectInspector(typeInfo);
+
+        int currUser = 0;
+        Object nextRecord = null;
+        while ((nextRecord = recordReader.next(nextRecord)) != null) {
+            Assert.assertNotNull(nextRecord);
+            Assert.assertTrue("Not an OrcStruct", nextRecord instanceof OrcStruct);
+            List<Object> x = inspector.getStructFieldsDataAsList(nextRecord);
+
+            if (assertFunction == null) {
+                assertEquals("name" + currUser, x.get(0).toString());
+                assertEquals(currUser, ((IntWritable) x.get(1)).get());
+                assertEquals("blue" + currUser, x.get(2).toString());
+                assertEquals(10.0 * currUser, ((DoubleWritable) x.get(3)).get(), Double.MIN_VALUE);
+            } else {
+                assertFunction.apply(x, currUser);
+            }
+            currUser++;
+        }
+
+        assertEquals(numExpectedUsers, currUser);
+    }
+
+}
\ No newline at end of file