You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2019/03/04 14:55:42 UTC

[nifi-minifi-cpp] branch master updated: MINIFICPP-750: Add Sentiment Analyzer

This is an automated email from the ASF dual-hosted git repository.

aldrin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a01718  MINIFICPP-750: Add Sentiment Analyzer
8a01718 is described below

commit 8a01718bd20464833e08b3723f543ce41e987ecc
Author: Marc Parisi <ph...@apache.org>
AuthorDate: Fri Mar 1 16:46:14 2019 -0500

    MINIFICPP-750: Add Sentiment Analyzer
    
    This closes #494.
    
    Signed-off-by: Aldrin Piri <al...@apache.org>
---
 CMakeLists.txt                                   |  4 +++
 README.md                                        |  1 +
 conf/minifi.properties                           |  3 +-
 extensions/pythonprocessors/SentimentAnalysis.py | 43 ++++++++++++++++++++++++
 extensions/script/ExampleProcessor.py            | 15 +++++++++
 extensions/script/README.md                      | 10 ++++++
 extensions/script/python/PythonCreator.h         | 42 +++++++++++++----------
 main/MiNiFiMain.cpp                              |  4 +--
 PYTHON.md => nanofi/PYTHON.md                    |  0
 9 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45e49cd..a540c63 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -606,6 +606,10 @@ set(CPACK_SOURCE_IGNORE_FILES "/docs/generated/;${CMAKE_SOURCE_DIR}/build/;~$;${
 install(FILES conf/minifi.properties conf/minifi-log.properties conf/minifi-uid.properties conf/config.yml
         DESTINATION conf
         COMPONENT bin)
+        
+install(DIRECTORY extensions/pythonprocessors/
+        DESTINATION minifi-python
+        COMPONENT bin)
 
 install(PROGRAMS bin/minifi.sh
         DESTINATION bin
diff --git a/README.md b/README.md
index 793dec7..6e69e1a 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,7 @@ Through JNI extensions you can run NiFi processors using NARs. The JNI extension
 | Tensorflow | [TFApplyGraph](PROCESSORS.md#tfapplygraph)<br/>[TFConvertImageToTensor](PROCESSORS.md#tfconvertimagetotensor)<br/>[TFExtractTopLabels](PROCESSORS.md#tfextracttoplabels)<br/>      |    -DENABLE_TENSORFLOW=ON  |
 | USB Camera | [GetUSBCamera](PROCESSORS.md#getusbcamera)     |    -DENABLE_USB_CAMERA=ON  |
 
+ Please see our [Python guide](extensions/script/README.md) on how to write Python processors and use them within MiNiFi C++. 
 
 ## Caveats
 * 0.5.0 represents a GA-release. We follow semver so you can expect API and ABI compatibility within minor releases. See [semver's website](https://semver.org/) for more information
diff --git a/conf/minifi.properties b/conf/minifi.properties
index 705bb16..90bca9a 100644
--- a/conf/minifi.properties
+++ b/conf/minifi.properties
@@ -49,4 +49,5 @@ nifi.nar.directory=${MINIFI_HOME}/minifi-jni/nars
 nifi.nar.deploy.directory=${MINIFI_HOME}/minifi-jni/nardeploy
 nifi.nar.deploy.directory=${MINIFI_HOME}/minifi-jni/nardocs
 # must be comma separated 
-nifi.jvm.options=-Xmx1G
\ No newline at end of file
+nifi.jvm.options=-Xmx1G
+nifi.python.processor.dir=${MINIFI_HOME}/minifi-python/
\ No newline at end of file
diff --git a/extensions/pythonprocessors/SentimentAnalysis.py b/extensions/pythonprocessors/SentimentAnalysis.py
new file mode 100644
index 0000000..f92f8bc
--- /dev/null
+++ b/extensions/pythonprocessors/SentimentAnalysis.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import codecs
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+def describe(processor):
+    processor.setDescription("Provides a sentiment analysis of the content within the flow file")
+
+def onInitialize(processor):
+  processor.setSupportsDynamicProperties()
+
+class VaderSentiment(object):
+  def __init__(self):
+    self.content = None
+
+  def process(self, input_stream):
+    self.content = codecs.getreader('utf-8')(input_stream).read()
+    return len(self.content)
+
+def onTrigger(context, session):
+  flow_file = session.get()
+  if flow_file is not None:
+    sentiment = VaderSentiment()
+    session.read(flow_file,sentiment)
+    analyzer = SentimentIntensityAnalyzer()
+    vs = analyzer.polarity_scores(sentiment.content)
+    flow_file.addAttribute("positive",str(vs['pos']))
+    flow_file.addAttribute("negative",str(vs['neg']))
+    flow_file.addAttribute("neutral",str(vs['neu']))
+    session.transfer(flow_file, REL_SUCCESS)
diff --git a/extensions/script/ExampleProcessor.py b/extensions/script/ExampleProcessor.py
index 96c9b92..6ef7627 100644
--- a/extensions/script/ExampleProcessor.py
+++ b/extensions/script/ExampleProcessor.py
@@ -1,3 +1,18 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 def describe(processor):
     processor.setDescription("Adds an attribute to your flow files")
 
diff --git a/extensions/script/README.md b/extensions/script/README.md
index ad83448..c28e373 100644
--- a/extensions/script/README.md
+++ b/extensions/script/README.md
@@ -59,3 +59,13 @@ To enable python Processor capabilities, the following options need to be provid
 	#directory where processors exist
 	nifi.python.processor.dir=XXXX
 	
+	
+## Processors
+The python directory (extensions/pythonprocessors) contains implementations that will be available for flows if the required dependencies
+exist.
+   
+## Sentiment Analysis
+
+The SentimentAnalysis processor will perform a Vder Sentiment Analysis. This requires that you install nltk and VaderSentiment
+		pip install nltk
+		pip install VaderSentiment
diff --git a/extensions/script/python/PythonCreator.h b/extensions/script/python/PythonCreator.h
index 54ff155..8ce9b53 100644
--- a/extensions/script/python/PythonCreator.h
+++ b/extensions/script/python/PythonCreator.h
@@ -85,25 +85,30 @@ class PythonCreator : public minifi::core::CoreComponent {
         utils::Identifier uuid;
         auto processor = std::dynamic_pointer_cast<core::Processor>(core::ClassLoader::getDefaultClassLoader().instantiate(scriptName, uuid));
         if (processor) {
-          processor->initialize();
-          auto proc = std::dynamic_pointer_cast<python::processors::ExecutePythonProcessor>(processor);
-          minifi::BundleDetails details;
-          details.artifact = getFileName(path);
-          details.version = minifi::AgentBuild::VERSION;
-          details.group = "python";
-          minifi::ClassDescription description("org.apache.nifi.minifi.processors." + scriptName);
-          description.dynamic_properties_ = proc->getPythonSupportDynamicProperties();
-          auto properties = proc->getPythonProperties();
-
-          minifi::AgentDocs::putDescription(scriptName, proc->getDescription());
-          for (const auto &prop : properties) {
-            description.class_properties_.insert(std::make_pair(prop.getName(), prop));
+          try {
+            processor->initialize();
+            auto proc = std::dynamic_pointer_cast<python::processors::ExecutePythonProcessor>(processor);
+            minifi::BundleDetails details;
+            details.artifact = getFileName(path);
+            details.version = minifi::AgentBuild::VERSION;
+            details.group = "python";
+            minifi::ClassDescription description("org.apache.nifi.minifi.processors." + scriptName);
+            description.dynamic_properties_ = proc->getPythonSupportDynamicProperties();
+            auto properties = proc->getPythonProperties();
+
+            minifi::AgentDocs::putDescription(scriptName, proc->getDescription());
+            for (const auto &prop : properties) {
+              description.class_properties_.insert(std::make_pair(prop.getName(), prop));
+            }
+
+            for (const auto &rel : proc->getSupportedRelationships()) {
+              description.class_relationships_.push_back(rel);
+            }
+            minifi::ExternalBuildDescription::addExternalComponent(details, description);
+          } catch (const std::exception &e) {
+            logger_->log_warn("Cannot load %s because of %s", scriptName, e.what());
           }
 
-          for (const auto &rel : proc->getSupportedRelationships()) {
-            description.class_relationships_.push_back(rel);
-          }
-          minifi::ExternalBuildDescription::addExternalComponent(details, description);
         }
 
       }
@@ -128,7 +133,8 @@ class PythonCreator : public minifi::core::CoreComponent {
   std::vector<std::string> classpaths_;
 
   std::shared_ptr<logging::Logger> logger_;
-};
+}
+;
 
 } /* namespace python */
 } /* namespace minifi */
diff --git a/main/MiNiFiMain.cpp b/main/MiNiFiMain.cpp
index cd0d22e..ba74ace 100644
--- a/main/MiNiFiMain.cpp
+++ b/main/MiNiFiMain.cpp
@@ -289,15 +289,13 @@ int main(int argc, char **argv) {
 	 */
 	if (sem_wait(running) == -1)
 		perror("sem_wait");
-	std::cout << "leaving" << std::endl;
+
 	if (sem_close(running) == -1)
 		perror("sem_close");
 
 	if (sem_unlink("/MiNiFiMain") == -1)
 		perror("sem_unlink");
 
-	std::cout << "leaving" << std::endl;
-
 	/**
 	 * Trigger unload -- wait stop_wait_time
 	 */
diff --git a/PYTHON.md b/nanofi/PYTHON.md
similarity index 100%
rename from PYTHON.md
rename to nanofi/PYTHON.md