You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2019/03/04 14:55:42 UTC
[nifi-minifi-cpp] branch master updated: MINIFICPP-750: Add
Sentiment Analyzer
This is an automated email from the ASF dual-hosted git repository.
aldrin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
The following commit(s) were added to refs/heads/master by this push:
new 8a01718 MINIFICPP-750: Add Sentiment Analyzer
8a01718 is described below
commit 8a01718bd20464833e08b3723f543ce41e987ecc
Author: Marc Parisi <ph...@apache.org>
AuthorDate: Fri Mar 1 16:46:14 2019 -0500
MINIFICPP-750: Add Sentiment Analyzer
This closes #494.
Signed-off-by: Aldrin Piri <al...@apache.org>
---
CMakeLists.txt | 4 +++
README.md | 1 +
conf/minifi.properties | 3 +-
extensions/pythonprocessors/SentimentAnalysis.py | 43 ++++++++++++++++++++++++
extensions/script/ExampleProcessor.py | 15 +++++++++
extensions/script/README.md | 10 ++++++
extensions/script/python/PythonCreator.h | 42 +++++++++++++----------
main/MiNiFiMain.cpp | 4 +--
PYTHON.md => nanofi/PYTHON.md | 0
9 files changed, 100 insertions(+), 22 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45e49cd..a540c63 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -606,6 +606,10 @@ set(CPACK_SOURCE_IGNORE_FILES "/docs/generated/;${CMAKE_SOURCE_DIR}/build/;~$;${
install(FILES conf/minifi.properties conf/minifi-log.properties conf/minifi-uid.properties conf/config.yml
DESTINATION conf
COMPONENT bin)
+
+install(DIRECTORY extensions/pythonprocessors/
+ DESTINATION minifi-python
+ COMPONENT bin)
install(PROGRAMS bin/minifi.sh
DESTINATION bin
diff --git a/README.md b/README.md
index 793dec7..6e69e1a 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,7 @@ Through JNI extensions you can run NiFi processors using NARs. The JNI extension
| Tensorflow | [TFApplyGraph](PROCESSORS.md#tfapplygraph)<br/>[TFConvertImageToTensor](PROCESSORS.md#tfconvertimagetotensor)<br/>[TFExtractTopLabels](PROCESSORS.md#tfextracttoplabels)<br/> | -DENABLE_TENSORFLOW=ON |
| USB Camera | [GetUSBCamera](PROCESSORS.md#getusbcamera) | -DENABLE_USB_CAMERA=ON |
+ Please see our [Python guide](extensions/script/README.md) on how to write Python processors and use them within MiNiFi C++.
## Caveats
* 0.5.0 represents a GA-release. We follow semver so you can expect API and ABI compatibility within minor releases. See [semver's website](https://semver.org/) for more information
diff --git a/conf/minifi.properties b/conf/minifi.properties
index 705bb16..90bca9a 100644
--- a/conf/minifi.properties
+++ b/conf/minifi.properties
@@ -49,4 +49,5 @@ nifi.nar.directory=${MINIFI_HOME}/minifi-jni/nars
nifi.nar.deploy.directory=${MINIFI_HOME}/minifi-jni/nardeploy
nifi.nar.deploy.directory=${MINIFI_HOME}/minifi-jni/nardocs
# must be comma separated
-nifi.jvm.options=-Xmx1G
\ No newline at end of file
+nifi.jvm.options=-Xmx1G
+nifi.python.processor.dir=${MINIFI_HOME}/minifi-python/
\ No newline at end of file
diff --git a/extensions/pythonprocessors/SentimentAnalysis.py b/extensions/pythonprocessors/SentimentAnalysis.py
new file mode 100644
index 0000000..f92f8bc
--- /dev/null
+++ b/extensions/pythonprocessors/SentimentAnalysis.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import codecs
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+def describe(processor):
+ processor.setDescription("Provides a sentiment analysis of the content within the flow file")
+
+def onInitialize(processor):
+ processor.setSupportsDynamicProperties()
+
+class VaderSentiment(object):
+ def __init__(self):
+ self.content = None
+
+ def process(self, input_stream):
+ self.content = codecs.getreader('utf-8')(input_stream).read()
+ return len(self.content)
+
+def onTrigger(context, session):
+ flow_file = session.get()
+ if flow_file is not None:
+ sentiment = VaderSentiment()
+ session.read(flow_file,sentiment)
+ analyzer = SentimentIntensityAnalyzer()
+ vs = analyzer.polarity_scores(sentiment.content)
+ flow_file.addAttribute("positive",str(vs['pos']))
+ flow_file.addAttribute("negative",str(vs['neg']))
+ flow_file.addAttribute("neutral",str(vs['neu']))
+ session.transfer(flow_file, REL_SUCCESS)
diff --git a/extensions/script/ExampleProcessor.py b/extensions/script/ExampleProcessor.py
index 96c9b92..6ef7627 100644
--- a/extensions/script/ExampleProcessor.py
+++ b/extensions/script/ExampleProcessor.py
@@ -1,3 +1,18 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
def describe(processor):
processor.setDescription("Adds an attribute to your flow files")
diff --git a/extensions/script/README.md b/extensions/script/README.md
index ad83448..c28e373 100644
--- a/extensions/script/README.md
+++ b/extensions/script/README.md
@@ -59,3 +59,13 @@ To enable python Processor capabilities, the following options need to be provid
#directory where processors exist
nifi.python.processor.dir=XXXX
+
+## Processors
+The python directory (extensions/pythonprocessors) contains implementations that will be available for flows if the required dependencies
+exist.
+
+## Sentiment Analysis
+
+The SentimentAnalysis processor will perform a Vder Sentiment Analysis. This requires that you install nltk and VaderSentiment
+ pip install nltk
+ pip install VaderSentiment
diff --git a/extensions/script/python/PythonCreator.h b/extensions/script/python/PythonCreator.h
index 54ff155..8ce9b53 100644
--- a/extensions/script/python/PythonCreator.h
+++ b/extensions/script/python/PythonCreator.h
@@ -85,25 +85,30 @@ class PythonCreator : public minifi::core::CoreComponent {
utils::Identifier uuid;
auto processor = std::dynamic_pointer_cast<core::Processor>(core::ClassLoader::getDefaultClassLoader().instantiate(scriptName, uuid));
if (processor) {
- processor->initialize();
- auto proc = std::dynamic_pointer_cast<python::processors::ExecutePythonProcessor>(processor);
- minifi::BundleDetails details;
- details.artifact = getFileName(path);
- details.version = minifi::AgentBuild::VERSION;
- details.group = "python";
- minifi::ClassDescription description("org.apache.nifi.minifi.processors." + scriptName);
- description.dynamic_properties_ = proc->getPythonSupportDynamicProperties();
- auto properties = proc->getPythonProperties();
-
- minifi::AgentDocs::putDescription(scriptName, proc->getDescription());
- for (const auto &prop : properties) {
- description.class_properties_.insert(std::make_pair(prop.getName(), prop));
+ try {
+ processor->initialize();
+ auto proc = std::dynamic_pointer_cast<python::processors::ExecutePythonProcessor>(processor);
+ minifi::BundleDetails details;
+ details.artifact = getFileName(path);
+ details.version = minifi::AgentBuild::VERSION;
+ details.group = "python";
+ minifi::ClassDescription description("org.apache.nifi.minifi.processors." + scriptName);
+ description.dynamic_properties_ = proc->getPythonSupportDynamicProperties();
+ auto properties = proc->getPythonProperties();
+
+ minifi::AgentDocs::putDescription(scriptName, proc->getDescription());
+ for (const auto &prop : properties) {
+ description.class_properties_.insert(std::make_pair(prop.getName(), prop));
+ }
+
+ for (const auto &rel : proc->getSupportedRelationships()) {
+ description.class_relationships_.push_back(rel);
+ }
+ minifi::ExternalBuildDescription::addExternalComponent(details, description);
+ } catch (const std::exception &e) {
+ logger_->log_warn("Cannot load %s because of %s", scriptName, e.what());
}
- for (const auto &rel : proc->getSupportedRelationships()) {
- description.class_relationships_.push_back(rel);
- }
- minifi::ExternalBuildDescription::addExternalComponent(details, description);
}
}
@@ -128,7 +133,8 @@ class PythonCreator : public minifi::core::CoreComponent {
std::vector<std::string> classpaths_;
std::shared_ptr<logging::Logger> logger_;
-};
+}
+;
} /* namespace python */
} /* namespace minifi */
diff --git a/main/MiNiFiMain.cpp b/main/MiNiFiMain.cpp
index cd0d22e..ba74ace 100644
--- a/main/MiNiFiMain.cpp
+++ b/main/MiNiFiMain.cpp
@@ -289,15 +289,13 @@ int main(int argc, char **argv) {
*/
if (sem_wait(running) == -1)
perror("sem_wait");
- std::cout << "leaving" << std::endl;
+
if (sem_close(running) == -1)
perror("sem_close");
if (sem_unlink("/MiNiFiMain") == -1)
perror("sem_unlink");
- std::cout << "leaving" << std::endl;
-
/**
* Trigger unload -- wait stop_wait_time
*/
diff --git a/PYTHON.md b/nanofi/PYTHON.md
similarity index 100%
rename from PYTHON.md
rename to nanofi/PYTHON.md