You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by jo...@apache.org on 2017/07/21 21:16:42 UTC

nifi git commit: NIFI-4087 This closes #2026. Fix to allow exclusion of filename from tika criteria.

Repository: nifi
Updated Branches:
  refs/heads/master 695e8aa98 -> 3371e915c


NIFI-4087 This closes #2026. Fix to allow exclusion of filename from tika criteria.


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/3371e915
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/3371e915
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/3371e915

Branch: refs/heads/master
Commit: 3371e915ccf29f6d7a240dd52ea11cc10cf8bc5c
Parents: 695e8aa
Author: Leah Anderson <an...@gmail.com>
Authored: Thu Jul 20 19:20:54 2017 -0400
Committer: joewitt <jo...@apache.org>
Committed: Fri Jul 21 16:56:56 2017 -0400

----------------------------------------------------------------------
 .../processors/standard/IdentifyMimeType.java   | 27 ++++++++++++++++++--
 .../standard/TestIdentifyMimeType.java          | 17 ++++++++++++
 .../resources/TestIdentifyMimeType/fake.csv     | 20 +++++++++++++++
 3 files changed, 62 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/3371e915/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
index 8c2bdf9..c259e88 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
@@ -19,8 +19,10 @@ package org.apache.nifi.processors.standard;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
 
@@ -32,6 +34,7 @@ import org.apache.nifi.annotation.behavior.SupportsBatching;
 import org.apache.nifi.annotation.behavior.WritesAttribute;
 import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.flowfile.FlowFile;
 import org.apache.nifi.flowfile.attributes.CoreAttributes;
 import org.apache.nifi.logging.ComponentLog;
@@ -78,12 +81,22 @@ import org.apache.tika.mime.MimeTypeException;
         + "If unable to detect the MIME Type, the attribute's value will be set to application/octet-stream")
 public class IdentifyMimeType extends AbstractProcessor {
 
+    public static final PropertyDescriptor USE_FILENAME_IN_DETECTION = new PropertyDescriptor.Builder()
+           .displayName("Use Filename In Detection")
+           .name("use-filename-in-detection")
+           .description("If true will pass the filename to Tika to aid in detection.")
+           .required(true)
+           .allowableValues("true", "false")
+           .defaultValue("true")
+           .build();
+
     public static final Relationship REL_SUCCESS = new Relationship.Builder()
             .name("success")
             .description("All FlowFiles are routed to success")
             .build();
 
     private Set<Relationship> relationships;
+    private List<PropertyDescriptor> properties;
 
     private final TikaConfig config;
     private final Detector detector;
@@ -96,6 +109,11 @@ public class IdentifyMimeType extends AbstractProcessor {
 
     @Override
     protected void init(final ProcessorInitializationContext context) {
+
+        final List<PropertyDescriptor> properties = new ArrayList<>();
+        properties.add(USE_FILENAME_IN_DETECTION);
+        this.properties = Collections.unmodifiableList(properties);
+
         final Set<Relationship> rels = new HashSet<>();
         rels.add(REL_SUCCESS);
         this.relationships = Collections.unmodifiableSet(rels);
@@ -107,6 +125,11 @@ public class IdentifyMimeType extends AbstractProcessor {
     }
 
     @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return properties;
+    }
+
+    @Override
     public void onTrigger(final ProcessContext context, final ProcessSession session) {
         FlowFile flowFile = session.get();
         if (flowFile == null) {
@@ -123,8 +146,8 @@ public class IdentifyMimeType extends AbstractProcessor {
                 try (final InputStream in = new BufferedInputStream(stream)) {
                     TikaInputStream tikaStream = TikaInputStream.get(in);
                     Metadata metadata = new Metadata();
-                    // Add filename if it exists
-                    if (filename != null) {
+
+                    if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
                         metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                     }
                     // Get mime type

http://git-wip-us.apache.org/repos/asf/nifi/blob/3371e915/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
index 3465b89..dc61113 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestIdentifyMimeType.java
@@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals;
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -72,6 +73,7 @@ public class TestIdentifyMimeType {
         expectedMimeTypes.put("1.xml", "application/xml");
         expectedMimeTypes.put("flowfilev3", "application/flowfile-v3");
         expectedMimeTypes.put("flowfilev1.tar", "application/flowfile-v1");
+        expectedMimeTypes.put("fake.csv", "text/csv");
 
         final Map<String, String> expectedExtensions = new HashMap<>();
         expectedExtensions.put("1.7z", ".7z");
@@ -91,6 +93,7 @@ public class TestIdentifyMimeType {
         expectedExtensions.put("1.xml", ".xml");
         expectedExtensions.put("flowfilev3", "");
         expectedExtensions.put("flowfilev1.tar", "");
+        expectedExtensions.put("fake.csv", ".csv");
 
         final List<MockFlowFile> filesOut = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS);
         for (final MockFlowFile file : filesOut) {
@@ -105,4 +108,18 @@ public class TestIdentifyMimeType {
             assertEquals("Expected " + file + " to have extension " + expectedExtension + ", but it was " + extension, expectedExtension, extension);
         }
     }
+
+    @Test
+    public void testIgnoreFileName() throws Exception {
+        final TestRunner runner = TestRunners.newTestRunner(new IdentifyMimeType());
+        runner.setProperty(IdentifyMimeType.USE_FILENAME_IN_DETECTION, "false");
+
+        runner.enqueue(Paths.get("src/test/resources/TestIdentifyMimeType/fake.csv"));
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(IdentifyMimeType.REL_SUCCESS, 1);
+        MockFlowFile flowFile = runner.getFlowFilesForRelationship(IdentifyMimeType.REL_SUCCESS).get(0);
+        flowFile.assertAttributeEquals("mime.extension", ".txt");
+        flowFile.assertAttributeEquals("mime.type", "text/plain");
+    }
 }

http://git-wip-us.apache.org/repos/asf/nifi/blob/3371e915/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv
new file mode 100644
index 0000000..f8ba006
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/TestIdentifyMimeType/fake.csv
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+this is not a valid CSV file but
+is intended to verify that the updated 
+IdentifyMIMEType works as expected.