You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by jw...@apache.org on 2017/03/17 17:50:05 UTC
nifi git commit: NIFI-2613 Apache POI processor for Excel to CSV
Repository: nifi
Updated Branches:
refs/heads/master 47c6718fe -> d05727b8c
NIFI-2613 Apache POI processor for Excel to CSV
Signed-off-by: James Wing <jv...@gmail.com>
This closes #929.
Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/d05727b8
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/d05727b8
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/d05727b8
Branch: refs/heads/master
Commit: d05727b8c0bc08630a57457e3b39ae2deebbd6d3
Parents: 47c6718
Author: Jeremy Dyer <jd...@gmail.com>
Authored: Wed Aug 24 11:51:46 2016 -0400
Committer: James Wing <jv...@gmail.com>
Committed: Fri Mar 17 10:39:05 2017 -0700
----------------------------------------------------------------------
nifi-assembly/NOTICE | 5 +
nifi-assembly/pom.xml | 5 +
.../nifi-poi-bundle/nifi-poi-nar/pom.xml | 41 ++
.../src/main/resources/META-INF/LICENSE | 209 ++++++++++
.../src/main/resources/META-INF/NOTICE | 35 ++
.../nifi-poi-bundle/nifi-poi-processors/pom.xml | 82 ++++
.../poi/ConvertExcelToCSVProcessor.java | 418 +++++++++++++++++++
.../org.apache.nifi.processor.Processor | 15 +
.../poi/ConvertExcelToCSVProcessorTest.java | 170 ++++++++
.../src/test/resources/CollegeScorecard.xlsx | Bin 0 -> 5473097 bytes
.../src/test/resources/TwoSheets.xlsx | Bin 0 -> 8987 bytes
.../src/test/resources/Unsupported.xls | Bin 0 -> 26112 bytes
nifi-nar-bundles/nifi-poi-bundle/pom.xml | 35 ++
nifi-nar-bundles/pom.xml | 1 +
pom.xml | 6 +
15 files changed, 1022 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-assembly/NOTICE
----------------------------------------------------------------------
diff --git a/nifi-assembly/NOTICE b/nifi-assembly/NOTICE
index 87c7bf5..3821269 100644
--- a/nifi-assembly/NOTICE
+++ b/nifi-assembly/NOTICE
@@ -541,6 +541,11 @@ The following binary components are provided under the Apache Software License v
Apache Kafka
Copyright 2012 The Apache Software Foundation.
+ (ASLv2) Apache POI
+ The following NOTICE information applies:
+ Apache POI
+ Copyright 2012 The Apache Software Foundation.
+
(ASLv2) Yammer Metrics
The following NOTICE information applies:
Metrics
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-assembly/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml
index 3e5104f..f17c3c6 100755
--- a/nifi-assembly/pom.xml
+++ b/nifi-assembly/pom.xml
@@ -210,6 +210,11 @@ language governing permissions and limitations under the License. -->
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-nar</artifactId>
+ <type>nar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
<artifactId>nifi-kite-nar</artifactId>
<type>nar</type>
</dependency>
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
new file mode 100644
index 0000000..244943b
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-bundle</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>nifi-poi-nar</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ <packaging>nar</packaging>
+ <properties>
+ <maven.javadoc.skip>true</maven.javadoc.skip>
+ <source.skip>true</source.skip>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-processors</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+
+</project>
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/LICENSE
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/LICENSE
new file mode 100644
index 0000000..6effaa8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/LICENSE
@@ -0,0 +1,209 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+APACHE NIFI SUBCOMPONENTS:
+
+The Apache NiFi project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses.
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/NOTICE
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000..8289cbb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,35 @@
+nifi-poi-nar
+Copyright 2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+===========================================
+Apache Software License v2
+===========================================
+
+The following binary components are provided under the Apache Software License v2
+
+ (ASLv2) Apache POI
+ The following NOTICE information applies:
+
+ This product contains parts that were originally based on software from BEA.
+ Copyright (c) 2000-2003, BEA Systems, <http://www.bea.com/>.
+
+ This product contains W3C XML Schema documents. Copyright 2001-2003 (c)
+ World Wide Web Consortium (Massachusetts Institute of Technology, European
+ Research Consortium for Informatics and Mathematics, Keio University)
+
+ This product contains the Piccolo XML Parser for Java
+ (http://piccolo.sourceforge.net/). Copyright 2002 Yuval Oren.
+
+ This product contains the chunks_parse_cmds.tbl file from the vsdump program.
+ Copyright (C) 2006-2007 Valek Filippov (frob@df.ru)
+
+ This product contains parts of the eID Applet project
+ (http://eid-applet.googlecode.com). Copyright (c) 2009-2014
+ FedICT (federal ICT department of Belgium), e-Contract.be BVBA (https://www.e-contract.be),
+ Bart Hanssens from FedICT
+
+ CurvesAIP is BSD-licensed software (https://github.com/virtuald/curvesapi/)
+ Copyright (c) 2005, Graph Builder
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
new file mode 100644
index 0000000..0fee5ce
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <properties>
+ <poi.version>3.14</poi.version>
+ </properties>
+
+ <parent>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-bundle</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>nifi-poi-processors</artifactId>
+ <packaging>jar</packaging>
+
+ <dependencies>
+ <!-- https://mvnrepository.com/artifact/xerces/xerces -->
+ <dependency>
+ <groupId>xerces</groupId>
+ <artifactId>xerces</artifactId>
+ <version>2.4.0</version>
+ </dependency>
+
+ <dependency>
+ <groupId>xerces</groupId>
+ <artifactId>xercesImpl</artifactId>
+ <version>2.11.0</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-processor-utils</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-mock</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.11</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
new file mode 100644
index 0000000..b881c69
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java
@@ -0,0 +1,418 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.poi;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.io.OutputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.apache.poi.xssf.model.SharedStringsTable;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.helpers.XMLReaderFactory;
+
+
+@Tags({"excel", "csv", "poi"})
+@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel " +
+ "document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file " +
+ "where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx " +
+ "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted " +
+ "CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
+@WritesAttributes({@WritesAttribute(attribute="sheetname", description="The name of the Excel sheet that this particular row of data came from in the Excel document"),
+ @WritesAttribute(attribute="numrows", description="The number of rows in this Excel Sheet"),
+ @WritesAttribute(attribute="sourcefilename", description="The name of the Excel document file that this data originated from"),
+ @WritesAttribute(attribute="convertexceltocsvprocessor.error", description="Error message that was encountered on a per Excel sheet basis. This attribute is" +
+ " only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end" +
+ " user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
+public class ConvertExcelToCSVProcessor
+ extends AbstractProcessor {
+
+ private static final String CSV_MIME_TYPE = "text/csv";
+ public static final String SHEET_NAME = "sheetname";
+ public static final String ROW_NUM = "numrows";
+ public static final String SOURCE_FILE_NAME = "sourcefilename";
+ private static final String SAX_CELL_REF = "c";
+ private static final String SAX_CELL_TYPE = "t";
+ private static final String SAX_CELL_STRING = "s";
+ private static final String SAX_CELL_CONTENT_REF = "v";
+ private static final String SAX_ROW_REF = "row";
+ private static final String SAX_SHEET_NAME_REF = "sheetPr";
+ private static final String DESIRED_SHEETS_DELIMITER = ",";
+ private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
+ private static final String SAX_PARSER = "org.apache.xerces.parsers.SAXParser";
+
+ public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor
+ .Builder().name("extract-sheets")
+ .displayName("Sheets to Extract")
+ .description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property" +
+ " is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not " +
+ "specified in this value will be ignored.")
+ .required(false)
+ .expressionLanguageSupported(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final Relationship ORIGINAL = new Relationship.Builder()
+ .name("original")
+ .description("Original Excel document received by this processor")
+ .build();
+
+ public static final Relationship SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("Excel data converted to csv")
+ .build();
+
+ public static final Relationship FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("Failed to parse the Excel document")
+ .build();
+
+ private List<PropertyDescriptor> descriptors;
+
+ private Set<Relationship> relationships;
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+ final List<PropertyDescriptor> descriptors = new ArrayList<>();
+ descriptors.add(DESIRED_SHEETS);
+ this.descriptors = Collections.unmodifiableList(descriptors);
+
+ final Set<Relationship> relationships = new HashSet<>();
+ relationships.add(ORIGINAL);
+ relationships.add(SUCCESS);
+ relationships.add(FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return this.relationships;
+ }
+
+ @Override
+ public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ return descriptors;
+ }
+
+ @Override
+ public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
+ final FlowFile flowFile = session.get();
+ if ( flowFile == null ) {
+ return;
+ }
+
+ try {
+
+ session.read(flowFile, new InputStreamCallback() {
+ @Override
+ public void process(InputStream inputStream) throws IOException {
+
+ try {
+ String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS)
+ .evaluateAttributeExpressions().getValue();
+
+ OPCPackage pkg = OPCPackage.open(inputStream);
+ XSSFReader r = new XSSFReader(pkg);
+ SharedStringsTable sst = r.getSharedStringsTable();
+ XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
+
+ if (desiredSheetsDelimited != null) {
+
+ String[] desiredSheets = StringUtils
+ .split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
+
+ if (desiredSheets != null) {
+
+ while (iter.hasNext()) {
+ InputStream sheet = iter.next();
+ String sheetName = iter.getSheetName();
+
+ for (int i = 0; i < desiredSheets.length; i++) {
+ //If the sheetName is a desired one parse it
+ if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
+ handleExcelSheet(session, flowFile, sst, sheet, sheetName);
+ break;
+ }
+ }
+ }
+ } else {
+ getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
+ }
+
+ } else {
+ //Get all of the sheets in the document.
+ while (iter.hasNext()) {
+ handleExcelSheet(session, flowFile, sst, iter.next(), iter.getSheetName());
+ }
+ }
+ } catch (InvalidFormatException ife) {
+ getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
+ throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife);
+ } catch (OpenXML4JException e) {
+ getLogger().error("Error occurred while processing Excel document metadata", e);
+ }
+ }
+ });
+
+ session.transfer(flowFile, ORIGINAL);
+
+ } catch (RuntimeException ex) {
+ getLogger().error("Failed to process incoming Excel document", ex);
+ FlowFile failedFlowFile = session.putAttribute(flowFile,
+ ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
+ session.transfer(failedFlowFile, FAILURE);
+ }
+ }
+
+
+ /**
+ * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
+ *
+ * @param session
+ * The NiFi ProcessSession instance for the current invocation.
+ */
+ private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
+ SharedStringsTable sst, final InputStream sheetInputStream, String sName) throws IOException {
+
+ FlowFile ff = session.create();
+ try {
+
+ XMLReader parser =
+ XMLReaderFactory.createXMLReader(
+ SAX_PARSER
+ );
+ ExcelSheetRowHandler handler = new ExcelSheetRowHandler(sst);
+ parser.setContentHandler(handler);
+
+ ff = session.write(ff, new OutputStreamCallback() {
+ @Override
+ public void process(OutputStream out) throws IOException {
+ InputSource sheetSource = new InputSource(sheetInputStream);
+ ExcelSheetRowHandler eh = null;
+ try {
+ eh = (ExcelSheetRowHandler) parser.getContentHandler();
+ eh.setFlowFileOutputStream(out);
+ parser.setContentHandler(eh);
+ parser.parse(sheetSource);
+ sheetInputStream.close();
+ } catch (SAXException se) {
+ getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{eh.getSheetName()}, se);
+ }
+ }
+ });
+
+ if (handler.getSheetName().equals(UNKNOWN_SHEET_NAME)) {
+ //Used the named parsed from the handler. This logic is only here because IF the handler does find a value that should take precedence.
+ ff = session.putAttribute(ff, SHEET_NAME, sName);
+ } else {
+ ff = session.putAttribute(ff, SHEET_NAME, handler.getSheetName());
+ sName = handler.getSheetName();
+ }
+
+ ff = session.putAttribute(ff, ROW_NUM, new Long(handler.getRowCount()).toString());
+
+ if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
+ ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
+ } else {
+ ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
+ }
+
+ //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
+ ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
+ ff.getAttribute(CoreAttributes.FILENAME.key()), sName));
+ ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
+
+ session.transfer(ff, SUCCESS);
+
+ } catch (SAXException saxE) {
+ getLogger().error("Failed to create instance of SAXParser {}", new Object[]{SAX_PARSER}, saxE);
+ ff = session.putAttribute(ff,
+ ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
+ session.transfer(ff, FAILURE);
+ } finally {
+ sheetInputStream.close();
+ }
+ }
+
+
+ /**
+ * Extracts every row from an Excel Sheet and generates a corresponding JSONObject whose key is the Excel CellAddress and value
+ * is the content of that CellAddress converted to a String
+ */
+ private class ExcelSheetRowHandler
+ extends DefaultHandler {
+
+ private SharedStringsTable sst;
+ private String currentContent;
+ private boolean nextIsString;
+ private OutputStream outputStream;
+ private boolean firstColInRow;
+ long rowCount;
+ String sheetName;
+
+ private ExcelSheetRowHandler(SharedStringsTable sst) {
+ this.sst = sst;
+ this.firstColInRow = true;
+ this.rowCount = 0l;
+ this.sheetName = UNKNOWN_SHEET_NAME;
+ }
+
+ public void setFlowFileOutputStream(OutputStream outputStream) {
+ this.outputStream = outputStream;
+ }
+
+ public void startElement(String uri, String localName, String name,
+ Attributes attributes) throws SAXException {
+
+ if (name.equals(SAX_CELL_REF)) {
+ String cellType = attributes.getValue(SAX_CELL_TYPE);
+ if(cellType != null && cellType.equals(SAX_CELL_STRING)) {
+ nextIsString = true;
+ } else {
+ nextIsString = false;
+ }
+ } else if (name.equals(SAX_ROW_REF)) {
+ firstColInRow = true;
+ } else if (name.equals(SAX_SHEET_NAME_REF)) {
+ sheetName = attributes.getValue(0);
+ }
+
+ currentContent = "";
+ }
+
+ public void endElement(String uri, String localName, String name)
+ throws SAXException {
+
+ if (nextIsString) {
+ int idx = Integer.parseInt(currentContent);
+ currentContent = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
+ nextIsString = false;
+ }
+
+ if (name.equals(SAX_CELL_CONTENT_REF)) {
+ if (firstColInRow) {
+ firstColInRow = false;
+ try {
+ outputStream.write(currentContent.getBytes());
+ } catch (IOException e) {
+ getLogger().error("IO error encountered while writing content of parsed cell " +
+ "value from sheet {}", new Object[]{getSheetName()}, e);
+ }
+ } else {
+ try {
+ outputStream.write(("," + currentContent).getBytes());
+ } catch (IOException e) {
+ getLogger().error("IO error encountered while writing content of parsed cell " +
+ "value from sheet {}", new Object[]{getSheetName()}, e);
+ }
+ }
+ }
+
+ if (name.equals(SAX_ROW_REF)) {
+ //If this is the first row and the end of the row element has been encountered then that means no columns were present.
+ if (!firstColInRow) {
+ try {
+ rowCount++;
+ outputStream.write("\n".getBytes());
+ } catch (IOException e) {
+ getLogger().error("IO error encountered while writing new line indicator", e);
+ }
+ }
+ }
+
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ currentContent += new String(ch, start, length);
+ }
+
+ public long getRowCount() {
+ return rowCount;
+ }
+
+ public String getSheetName() {
+ return sheetName;
+ }
+ }
+
+
+ /**
+ * Takes the original input filename and updates it by removing the file extension and replacing it with
+ * the .csv extension.
+ *
+ * @param origFileName
+ * Original filename from the input file.
+ *
+ * @return
+ * The new filename with the .csv extension that should be place in the output flowfile's attributes
+ */
+ private String updateFilenameToCSVExtension(String nifiUUID, String origFileName, String sheetName) {
+
+ StringBuilder stringBuilder = new StringBuilder();
+
+ if (StringUtils.isNotEmpty(origFileName)) {
+ String ext = FilenameUtils.getExtension(origFileName);
+ if (StringUtils.isNotEmpty(ext)) {
+ stringBuilder.append(StringUtils.replace(origFileName, ("." + ext), ""));
+ } else {
+ stringBuilder.append(origFileName);
+ }
+ } else {
+ stringBuilder.append(nifiUUID);
+ }
+
+ stringBuilder.append("_");
+ stringBuilder.append(sheetName);
+ stringBuilder.append(".");
+ stringBuilder.append("csv");
+
+ return stringBuilder.toString();
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
new file mode 100644
index 0000000..43baa0b
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
new file mode 100644
index 0000000..1972fbb
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.poi;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.util.LogMessage;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class ConvertExcelToCSVProcessorTest {
+
+ private TestRunner testRunner;
+
+ @Before
+ public void init() {
+ testRunner = TestRunners.newTestRunner(ConvertExcelToCSVProcessor.class);
+ }
+
+ @Test
+ public void testMultipleSheetsGeneratesMultipleFlowFiles() throws Exception {
+
+ testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+ MockFlowFile ffSheetA = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+ Long rowsSheetA = new Long(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(rowsSheetA == 4l);
+ assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetA"));
+ assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME).equals("TwoSheets.xlsx"));
+
+ //Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
+ assertTrue(ffSheetA.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetA.csv"));
+
+ MockFlowFile ffSheetB = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
+ Long rowsSheetB = new Long(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(rowsSheetB == 3l);
+ assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetB"));
+ assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME).equals("TwoSheets.xlsx"));
+
+ //Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
+ assertTrue(ffSheetB.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetB.csv"));
+
+ }
+
+ /**
+ * Validates that all sheets in the Excel document are exported.
+ *
+ * @throws Exception
+ * Any exception thrown during execution.
+ */
+ @Test
+ public void testProcessAllSheets() throws Exception {
+
+ testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+ MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+ Long l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(l == 7805l);
+
+ testRunner.clearProvenanceEvents();
+ testRunner.clearTransferState();
+
+ testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+ ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+ l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(l == 4l);
+
+ ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
+ l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(l == 3l);
+ }
+
+ /**
+ * Validates that the manually specified sheet is exported from the Excel document.
+ *
+ * @throws Exception
+ * Any exception thrown during execution.
+ */
+ @Test
+ public void testProcessASpecificSheetThatDoesExist() throws Exception {
+
+ testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Scorecard");
+ testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+
+ MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
+ Long l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
+ assertTrue(l == 7805l);
+ }
+
+ /**
+ * Tests for a syntactically valid Excel XSSF document with a manually specified Excel sheet that does not exist.
+ * In this scenario only the Original relationship should be invoked.
+ *
+ * @throws Exception
+ * Any exception thrown during execution.
+ */
+ @Test
+ public void testNonExistantSpecifiedSheetName() throws Exception {
+
+ testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "NopeIDoNotExist");
+ testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0); //We aren't expecting any output to success here because the sheet doesn't exist
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
+ }
+
+ /**
+ * Tests for graceful handling and error messaging of unsupported .XLS files.
+ */
+ @Test
+ public void testHandleUnsupportedXlsFile() throws Exception {
+
+ testRunner.enqueue(new File("src/test/resources/Unsupported.xls").toPath());
+ testRunner.run();
+
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 0);
+ testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1);
+
+ List<LogMessage> errorMessages = testRunner.getLogger().getErrorMessages();
+ Assert.assertEquals(2, errorMessages.size());
+ String messageText = errorMessages.get(0).getMsg();
+ Assert.assertTrue(messageText.contains("Excel") && messageText.contains("supported"));
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx
new file mode 100644
index 0000000..27fadea
Binary files /dev/null and b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx differ
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx
new file mode 100644
index 0000000..f4977b1
Binary files /dev/null and b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx differ
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls
new file mode 100644
index 0000000..6023329
Binary files /dev/null and b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls differ
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/nifi-poi-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-poi-bundle/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/pom.xml
new file mode 100644
index 0000000..ed2d99e
--- /dev/null
+++ b/nifi-nar-bundles/nifi-poi-bundle/pom.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-nar-bundles</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ </parent>
+
+ <groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-bundle</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ <packaging>pom</packaging>
+
+ <modules>
+ <module>nifi-poi-processors</module>
+ <module>nifi-poi-nar</module>
+ </modules>
+
+</project>
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/nifi-nar-bundles/pom.xml
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/pom.xml b/nifi-nar-bundles/pom.xml
index 72cba43..d783dca 100755
--- a/nifi-nar-bundles/pom.xml
+++ b/nifi-nar-bundles/pom.xml
@@ -77,6 +77,7 @@
<module>nifi-gcp-bundle</module>
<module>nifi-registry-bundle</module>
<module>nifi-stateful-analysis-bundle</module>
+ <module>nifi-poi-bundle</module>
</modules>
<dependencyManagement>
http://git-wip-us.apache.org/repos/asf/nifi/blob/d05727b8/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 4ded11d..3fe27db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1002,6 +1002,12 @@ language governing permissions and limitations under the License. -->
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
+ <artifactId>nifi-poi-nar</artifactId>
+ <version>1.2.0-SNAPSHOT</version>
+ <type>nar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.nifi</groupId>
<artifactId>nifi-kite-nar</artifactId>
<version>1.2.0-SNAPSHOT</version>
<type>nar</type>