You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2017/06/27 01:45:29 UTC
[2/5] opennlp git commit: Adding sentiment analysis code to OpenNLP:
OPENNLP-840
Adding sentiment analysis code to OpenNLP: OPENNLP-840
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/56321aab
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/56321aab
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/56321aab
Branch: refs/heads/master
Commit: 56321aab51a470cd2004b76fb1f5330881b943c1
Parents: 8d7e1c3
Author: Menshikova <an...@trincoll.edu>
Authored: Thu Jun 15 09:59:19 2017 -0700
Committer: Menshikova <an...@trincoll.edu>
Committed: Wed Jun 21 14:06:41 2017 -0700
----------------------------------------------------------------------
.../sentiment/SentimentCrossValidatorTool.java | 126 ++++++++++
.../SentimentDetailedFMeasureListener.java | 43 ++++
.../SentimentEvaluationErrorListener.java | 65 +++++
.../sentiment/SentimentEvaluatorTool.java | 154 ++++++++++++
.../cmdline/sentiment/SentimentModelLoader.java | 51 ++++
.../cmdline/sentiment/SentimentTrainerTool.java | 115 +++++++++
.../formats/SentimentSampleStreamFactory.java | 83 +++++++
.../java/opennlp/tools/sentiment/Sentiment.java | 30 +++
.../sentiment/SentimentContextGenerator.java | 83 +++++++
.../sentiment/SentimentCrossValidator.java | 240 +++++++++++++++++++
.../sentiment/SentimentEvaluationMonitor.java | 28 +++
.../tools/sentiment/SentimentEvaluator.java | 67 ++++++
.../tools/sentiment/SentimentEventStream.java | 80 +++++++
.../tools/sentiment/SentimentFactory.java | 73 ++++++
.../opennlp/tools/sentiment/SentimentME.java | 163 +++++++++++++
.../opennlp/tools/sentiment/SentimentModel.java | 124 ++++++++++
.../tools/sentiment/SentimentSample.java | 92 +++++++
.../tools/sentiment/SentimentSampleStream.java | 76 ++++++
.../sentiment/SentimentSampleTypeFilter.java | 68 ++++++
.../tools/sentiment/AbstractSentimentTest.java | 77 ++++++
.../sentiment/SentimentCrossValidatorTest.java | 35 +++
.../tools/sentiment/SentimentEvaluatorTest.java | 89 +++++++
.../sentiment/SentimentEventStreamTest.java | 46 ++++
.../tools/sentiment/SentimentMETest.java | 107 +++++++++
.../tools/sentiment/SentimentSampleTest.java | 45 ++++
.../tools/sentiment/en-netflix-sentiment.bin | Bin 0 -> 465780 bytes
.../tools/sentiment/en-stanford-sentiment.bin | Bin 0 -> 664663 bytes
.../opennlp/tools/sentiment/ht-lg-model-raw.bin | Bin 0 -> 347428 bytes
.../tools/sentiment/ht-sentiment-bin.bin | Bin 0 -> 82083 bytes
.../tools/sentiment/ht-sentiment-categ.bin | Bin 0 -> 161961 bytes
.../opennlp/tools/sentiment/sample_train_categ | 100 ++++++++
.../opennlp/tools/sentiment/sample_train_categ2 | 100 ++++++++
32 files changed, 2360 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java
new file mode 100755
index 0000000..05035a4
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentCrossValidatorTool.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+import opennlp.tools.cmdline.AbstractCrossValidatorTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.BasicTrainingParams;
+import opennlp.tools.cmdline.params.CVParams;
+import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams;
+import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool.CVToolParams;
+import opennlp.tools.sentiment.SentimentCrossValidator;
+import opennlp.tools.sentiment.SentimentEvaluationMonitor;
+import opennlp.tools.sentiment.SentimentFactory;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.util.eval.EvaluationMonitor;
+import opennlp.tools.util.model.ModelUtil;
+
+/**
+ * Class for helping perform cross validation on the Sentiment Analysis Parser.
+ */
+public class SentimentCrossValidatorTool
+ extends AbstractCrossValidatorTool<SentimentSample, CVToolParams> {
+
+ /**
+ * Interface for parameters
+ */
+ interface CVToolParams
+ extends BasicTrainingParams, CVParams, DetailedFMeasureEvaluatorParams {
+
+ }
+
+ /**
+ * Constructor
+ */
+ public SentimentCrossValidatorTool() {
+ super(SentimentSample.class, CVToolParams.class);
+ }
+
+ /**
+ * Returns the short description of the tool
+ *
+ * @return short description
+ */
+ public String getShortDescription() {
+ return "K-fold cross validator for the learnable Sentiment Analysis Parser";
+ }
+
+ /**
+ * Runs the tool
+ *
+ * @param format
+ * the format to be used
+ * @param args
+ * the arguments
+ */
+ public void run(String format, String[] args) {
+ super.run(format, args);
+
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
+ if (mlParams == null) {
+ mlParams = ModelUtil.createDefaultTrainingParameters();
+ }
+
+ List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<EvaluationMonitor<SentimentSample>>();
+ if (params.getMisclassified()) {
+ listeners.add(new SentimentEvaluationErrorListener());
+ }
+ SentimentDetailedFMeasureListener detailedFListener = null;
+ if (params.getDetailedF()) {
+ detailedFListener = new SentimentDetailedFMeasureListener();
+ listeners.add(detailedFListener);
+ }
+
+ SentimentFactory sentimentFactory = new SentimentFactory();
+
+ SentimentCrossValidator validator;
+ try {
+ validator = new SentimentCrossValidator(params.getLang(), mlParams,
+ sentimentFactory,
+ listeners.toArray(new SentimentEvaluationMonitor[listeners.size()]));
+ validator.evaluate(sampleStream, params.getFolds());
+ } catch (IOException e) {
+ throw new TerminateToolException(-1,
+ "IO error while reading training data or indexing data: "
+ + e.getMessage(),
+ e);
+ } finally {
+ try {
+ sampleStream.close();
+ } catch (IOException e) {
+ // sorry that this can fail
+ }
+ }
+
+ System.out.println("done");
+
+ System.out.println();
+
+ if (detailedFListener == null) {
+ System.out.println(validator.getFMeasure());
+ } else {
+ System.out.println(detailedFListener.toString());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java
new file mode 100755
index 0000000..c99fcfc
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentDetailedFMeasureListener.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import opennlp.tools.cmdline.DetailedFMeasureListener;
+import opennlp.tools.sentiment.SentimentEvaluationMonitor;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.util.Span;
+
+/**
+ * Class for creating a detailed F-Measure listener
+ */
+public class SentimentDetailedFMeasureListener
+ extends DetailedFMeasureListener<SentimentSample>
+ implements SentimentEvaluationMonitor {
+
+ /**
+ * Returns the sentiment sample as a span array
+ *
+ * @param sample
+ * the sentiment sample to be returned
+ * @return span array of the sample
+ */
+ @Override
+ protected Span[] asSpanArray(SentimentSample sample) {
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java
new file mode 100755
index 0000000..443eb14
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluationErrorListener.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import java.io.OutputStream;
+
+import opennlp.tools.cmdline.EvaluationErrorPrinter;
+import opennlp.tools.sentiment.SentimentEvaluationMonitor;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.util.eval.EvaluationMonitor;
+
+/**
+ * Class for creating an evaluation error listener.
+ */
+public class SentimentEvaluationErrorListener
+ extends EvaluationErrorPrinter<SentimentSample>
+ implements EvaluationMonitor<SentimentSample>,
+ SentimentEvaluationMonitor {
+
+ /**
+ * Constructor
+ */
+ public SentimentEvaluationErrorListener() {
+ super(System.err);
+ }
+
+ /**
+ * Constructor
+ */
+ public SentimentEvaluationErrorListener(OutputStream outputStream) {
+ super(outputStream);
+ }
+
+ /**
+ * Prints the error in case of a missclassification in the evaluator
+ *
+ * @param reference
+ * the sentiment sample reference to be used
+ * @param prediction
+ * the sentiment sampple prediction
+ */
+ @Override
+ public void missclassified(SentimentSample reference,
+ SentimentSample prediction) {
+ printError(new String[] { reference.getSentiment() },
+ new String[] { prediction.getSentiment() }, reference, prediction,
+ reference.getSentence());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java
new file mode 100755
index 0000000..4a773ef
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentEvaluatorTool.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+import opennlp.tools.cmdline.AbstractEvaluatorTool;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams;
+import opennlp.tools.cmdline.params.EvaluatorParams;
+import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool.EvalToolParams;
+import opennlp.tools.sentiment.SentimentEvaluationMonitor;
+import opennlp.tools.sentiment.SentimentEvaluator;
+import opennlp.tools.sentiment.SentimentME;
+import opennlp.tools.sentiment.SentimentModel;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.sentiment.SentimentSampleTypeFilter;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.eval.EvaluationMonitor;
+
+/**
+ * Class for creating an evaluation tool for sentiment analysis.
+ */
+public class SentimentEvaluatorTool
+ extends AbstractEvaluatorTool<SentimentSample, EvalToolParams> {
+
+ /**
+ * Interface for parameters to be used in evaluation
+ */
+ interface EvalToolParams
+ extends EvaluatorParams, DetailedFMeasureEvaluatorParams {
+ @OptionalParameter
+ @ParameterDescription(valueName = "types", description = "name types to use for evaluation")
+ String getNameTypes();
+ }
+
+ /**
+ * Constructor
+ */
+ public SentimentEvaluatorTool() {
+ super(SentimentSample.class, EvalToolParams.class);
+ }
+
+ /**
+ * Returns the short description of the tool
+ *
+ * @return short description
+ */
+ public String getShortDescription() {
+ return "Measures the performance of the Sentiment model with the reference data";
+ }
+
+ /**
+ * Runs the tool
+ *
+ * @param format
+ * the format to be used
+ * @param args
+ * the arguments
+ */
+ public void run(String format, String[] args) {
+ super.run(format, args);
+
+ SentimentModel model = new SentimentModelLoader().load(params.getModel());
+ // TODO: check EvalToolParams --> getNameTypes()
+
+ List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<EvaluationMonitor<SentimentSample>>();
+ if (params.getMisclassified()) {
+ listeners.add(new SentimentEvaluationErrorListener());
+ }
+ SentimentDetailedFMeasureListener detailedFListener = null;
+ if (params.getDetailedF()) {
+ detailedFListener = new SentimentDetailedFMeasureListener();
+ listeners.add(detailedFListener);
+ }
+
+ if (params.getNameTypes() != null) {
+ String[] nameTypes = params.getNameTypes().split(",");
+ sampleStream = new SentimentSampleTypeFilter(nameTypes, sampleStream);
+ }
+
+ SentimentEvaluator evaluator = new SentimentEvaluator(
+ new SentimentME(model),
+ listeners.toArray(new SentimentEvaluationMonitor[listeners.size()]));
+
+ final PerformanceMonitor monitor = new PerformanceMonitor("sent");
+
+ ObjectStream<SentimentSample> measuredSampleStream = new ObjectStream<SentimentSample>() {
+
+ public SentimentSample read() throws IOException {
+ SentimentSample sample = sampleStream.read();
+ if (sample != null) {
+ monitor.incrementCounter();
+ }
+ return sample;
+ }
+
+ public void reset() throws IOException {
+ sampleStream.reset();
+ }
+
+ public void close() throws IOException {
+ sampleStream.close();
+ }
+ };
+
+ monitor.startAndPrintThroughput();
+
+ try {
+ evaluator.evaluate(measuredSampleStream);
+ } catch (IOException e) {
+ System.err.println("failed");
+ throw new TerminateToolException(-1,
+ "IO error while reading test data: " + e.getMessage(), e);
+ } finally {
+ try {
+ measuredSampleStream.close();
+ } catch (IOException e) {
+ // sorry that this can fail
+ }
+ }
+
+ monitor.stopAndPrintFinalResult();
+
+ System.out.println();
+
+ if (detailedFListener == null) {
+ System.out.println(evaluator.getFMeasure());
+ } else {
+ System.out.println(detailedFListener.toString());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java
new file mode 100755
index 0000000..8cf2874
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentModelLoader.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import opennlp.tools.cmdline.ModelLoader;
+import opennlp.tools.sentiment.SentimentModel;
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * Class for loading a sentiment model.
+ */
+public class SentimentModelLoader extends ModelLoader<SentimentModel> {
+
+ /**
+ * Constructor
+ */
+ public SentimentModelLoader() {
+ super("Sentiment");
+ }
+
+ /**
+ * Loads the sentiment model
+ *
+ * @param modelIn
+ * the input stream model
+ * @return the model
+ */
+ @Override
+ protected SentimentModel loadModel(InputStream modelIn)
+ throws IOException, InvalidFormatException {
+ return new SentimentModel(modelIn);
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java
new file mode 100755
index 0000000..dd6ac44
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentiment/SentimentTrainerTool.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.sentiment;
+
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.tools.cmdline.AbstractTrainerTool;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.TrainingToolParams;
+import opennlp.tools.sentiment.Sentiment;
+import opennlp.tools.sentiment.SentimentFactory;
+import opennlp.tools.sentiment.SentimentME;
+import opennlp.tools.sentiment.SentimentModel;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.util.model.ModelUtil;
+
+/**
+ * Class for helping train a sentiment analysis model.
+ */
+public class SentimentTrainerTool
+ extends AbstractTrainerTool<SentimentSample, TrainingToolParams> {
+
+ /**
+ * Constructor
+ */
+ public SentimentTrainerTool() {
+ super(SentimentSample.class, TrainingToolParams.class);
+ }
+
+ /**
+ * Runs the trainer
+ *
+ * @param format
+ * the format to be used
+ * @param args
+ * the arguments
+ */
+ @Override
+ public void run(String format, String[] args) {
+ super.run(format, args);
+ if (0 == args.length) {
+ System.out.println(getHelp());
+ } else {
+
+ mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
+ if (mlParams == null) {
+ mlParams = ModelUtil.createDefaultTrainingParameters();
+ }
+
+ File modelOutFile = params.getModel();
+
+ CmdLineUtil.checkOutputFile("sentiment analysis model", modelOutFile);
+
+ SentimentModel model;
+ try {
+ SentimentFactory factory = new SentimentFactory();
+ Sentiment sentiment = new SentimentME(params.getLang(), mlParams, factory);
+ model = sentiment.train(sampleStream);
+ } catch (IOException e) {
+ throw new TerminateToolException(-1,
+ "IO error while reading training data or indexing data: "
+ + e.getMessage(),
+ e);
+ }
+ finally {
+ try {
+ sampleStream.close();
+ } catch (IOException e) {
+ // sorry that this can fail
+ }
+ }
+
+ CmdLineUtil.writeModel("sentiment analysis", modelOutFile, model);
+ }
+ }
+
+ /**
+ * Returns the help message
+ *
+ * @return the message
+ */
+ @Override
+ public String getHelp() {
+ return "Usage: " + CLI.CMD + " " + getName() + " model < documents";
+ }
+
+ /**
+ * Returns the short description of the programme
+ *
+ * @return the description
+ */
+ @Override
+ public String getShortDescription() {
+ return "learnable sentiment analysis";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java
new file mode 100644
index 0000000..3396740
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/SentimentSampleStreamFactory.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import java.io.IOException;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.BasicFormatParams;
+import opennlp.tools.sentiment.SentimentSample;
+import opennlp.tools.sentiment.SentimentSampleStream;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/**
+ * Class for creating a sample stream factory for sentiment analysis.
+ */
+public class SentimentSampleStreamFactory
+ extends AbstractSampleStreamFactory<SentimentSample> {
+
+ /**
+ * The constructor of the class; initialises the factory
+ *
+ * @param params
+ * any given parameters
+ */
+ protected <P> SentimentSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ /**
+ * Creates a sentiment sample stream factory
+ *
+ * @param args
+ * the necessary arguments
+ * @return SentimentSample stream (factory)
+ */
+ @Override
+ public ObjectStream<SentimentSample> create(String[] args) {
+ BasicFormatParams params = ArgumentParser.parse(args,
+ BasicFormatParams.class);
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ InputStreamFactory sampleDataIn = CmdLineUtil
+ .createInputStreamFactory(params.getData());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(sampleDataIn,
+ params.getEncoding());
+ } catch (IOException ex) {
+ CmdLineUtil.handleCreateObjectStreamError(ex);
+ }
+
+ return new SentimentSampleStream(lineStream);
+ }
+
+ /**
+ * Registers a SentimentSample stream factory
+ */
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentimentSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT,
+ new SentimentSampleStreamFactory(BasicFormatParams.class));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java
new file mode 100755
index 0000000..ac219a9
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/Sentiment.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.IOException;
+
+import opennlp.tools.util.ObjectStream;
+
+public interface Sentiment {
+
+ String predict(String[] tokens);
+
+ SentimentModel train(ObjectStream<SentimentSample> samples)
+ throws IOException;
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java
new file mode 100755
index 0000000..4185747
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentContextGenerator.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import opennlp.tools.util.BeamSearchContextGenerator;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+
+/**
+ * Class for using a Context Generator for Sentiment Analysis.
+ */
+public class SentimentContextGenerator
+ implements BeamSearchContextGenerator<String> {
+
+ private AdaptiveFeatureGenerator[] featureGenerators;
+
+ public SentimentContextGenerator() {
+ this(new AdaptiveFeatureGenerator[0]);
+ }
+
+ public SentimentContextGenerator(
+ AdaptiveFeatureGenerator[] featureGenerators) {
+ this.featureGenerators = featureGenerators;
+ }
+
+ /**
+ * Returns the context
+ *
+ * @param text
+ * the given text to be returned as context
+ * @return the text (the context)
+ */
+ public String[] getContext(String[] text) {
+ return text;
+ }
+
+ /**
+ * Returns the context
+ *
+ * @param index
+ * the index of the context
+ * @param sequence
+ * String sequence given
+ * @param priorDecisions
+ * decisions given earlier
+ * @param additionalContext
+ * any additional context
+ * @return the context
+ */
+ @Override
+ public String[] getContext(int index, String[] sequence,
+ String[] priorDecisions, Object[] additionalContext) {
+ return new String[] {};
+ }
+
+ public void updateAdaptiveData(String[] tokens, String[] outcomes) {
+
+ if (tokens != null && outcomes != null
+ && tokens.length != outcomes.length) {
+ throw new IllegalArgumentException(
+ "The tokens and outcome arrays MUST have the same size!");
+ }
+
+ for (AdaptiveFeatureGenerator featureGenerator : featureGenerators) {
+ featureGenerator.updateAdaptiveData(tokens, outcomes);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java
new file mode 100755
index 0000000..19af35a
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentCrossValidator.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.eval.CrossValidationPartitioner;
+import opennlp.tools.util.eval.FMeasure;
+
+/**
+ * Class for performing cross validation on the Sentiment Analysis Parser.
+ */
+public class SentimentCrossValidator {
+
+ /**
+ * Class for creating a document sample
+ */
+ private class DocumentSample {
+
+ private SentimentSample[] samples;
+
+ /**
+ * Constructor
+ */
+ DocumentSample(SentimentSample[] samples) {
+ this.samples = samples;
+ }
+
+ /**
+ * Returns the short description of the tool
+ *
+ * @return the samples
+ */
+ private SentimentSample[] getSamples() {
+ return samples;
+ }
+ }
+
+ /**
+ * Reads Sentiment Samples to group them as a document based on the clear
+ * adaptive data flag.
+ */
+ private class SentimentToDocumentSampleStream
+ extends FilterObjectStream<SentimentSample, DocumentSample> {
+
+ private SentimentSample beginSample;
+
+ /**
+ * Constructor
+ */
+ protected SentimentToDocumentSampleStream(
+ ObjectStream<SentimentSample> samples) {
+ super(samples);
+ }
+
+ /**
+ * Reads Sentiment Samples to group them as a document
+ *
+ * @return the resulting DocumentSample
+ */
+ public DocumentSample read() throws IOException {
+
+ List<SentimentSample> document = new ArrayList<SentimentSample>();
+
+ if (beginSample == null) {
+ // Assume that the clear flag is set
+ beginSample = samples.read();
+ }
+
+ // Underlying stream is exhausted!
+ if (beginSample == null) {
+ return null;
+ }
+
+ document.add(beginSample);
+
+ SentimentSample sample;
+ while ((sample = samples.read()) != null) {
+
+ if (sample.isClearAdaptiveDataSet()) {
+ beginSample = sample;
+ break;
+ }
+
+ document.add(sample);
+ }
+
+ // Underlying stream is exhausted,
+ // next call must return null
+ if (sample == null) {
+ beginSample = null;
+ }
+
+ return new DocumentSample(
+ document.toArray(new SentimentSample[document.size()]));
+ }
+
+ /**
+ * Performs a reset
+ *
+ * @return the resulting DocumentSample
+ */
+ @Override
+ public void reset() throws IOException, UnsupportedOperationException {
+ super.reset();
+ beginSample = null;
+ }
+ }
+
+ /**
+ * Splits DocumentSample into SentimentSamples.
+ */
+ private class DocumentToSentimentSampleStream
+ extends FilterObjectStream<DocumentSample, SentimentSample> {
+
+ /**
+ * Constructor
+ */
+ protected DocumentToSentimentSampleStream(
+ ObjectStream<DocumentSample> samples) {
+ super(samples);
+ }
+
+ private Iterator<SentimentSample> documentSamples = Collections
+ .<SentimentSample>emptyList().iterator();
+
+ /**
+ * Reads Document Sample into SentimentSample
+ *
+ * @return the resulting DocumentSample
+ */
+ public SentimentSample read() throws IOException {
+
+ // Note: Empty document samples should be skipped
+
+ if (documentSamples.hasNext()) {
+ return documentSamples.next();
+ } else {
+ DocumentSample docSample = samples.read();
+
+ if (docSample != null) {
+ documentSamples = Arrays.asList(docSample.getSamples()).iterator();
+
+ return read();
+ } else {
+ return null;
+ }
+ }
+ }
+ }
+
+ private final String languageCode;
+ private final TrainingParameters params;
+ private SentimentEvaluationMonitor[] listeners;
+
+ private SentimentFactory factory;
+ private FMeasure fmeasure = new FMeasure();
+
+ /**
+ * Constructor
+ */
+ public SentimentCrossValidator(String lang, TrainingParameters params,
+ SentimentFactory factory, SentimentEvaluationMonitor[] monitors) {
+
+ this.languageCode = lang;
+ this.factory = factory;
+ this.params = params;
+ this.listeners = monitors;
+ }
+
+ /**
+ * Performs evaluation
+ *
+ * @param samples
+ * stream of SentimentSamples
+ * @param nFolds
+ * the number of folds to be used in cross validation
+ */
+ public void evaluate(ObjectStream<SentimentSample> samples, int nFolds)
+ throws IOException {
+
+ // Note: The sentiment samples need to be grouped on a document basis.
+
+ CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<DocumentSample>(
+ new SentimentToDocumentSampleStream(samples), nFolds);
+
+ Sentiment sentiment = new SentimentME(languageCode, params, factory);
+
+ while (partitioner.hasNext()) {
+
+ CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
+ .next();
+
+ sentiment
+ .train(new DocumentToSentimentSampleStream(trainingSampleStream));
+
+ // do testing
+ SentimentEvaluator evaluator = new SentimentEvaluator(sentiment,
+ listeners);
+
+ evaluator.evaluate(new DocumentToSentimentSampleStream(
+ trainingSampleStream.getTestSampleStream()));
+
+ fmeasure.mergeInto(evaluator.getFMeasure());
+ }
+ }
+
+ /**
+ * Returns the F-Measure
+ *
+ * @return the F-Measure
+ */
+ public FMeasure getFMeasure() {
+ return fmeasure;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java
new file mode 100755
index 0000000..ab503f6
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluationMonitor.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import opennlp.tools.util.eval.EvaluationMonitor;
+
+/**
+ * Evaluation Monitor to be used by the evaluator
+ */
+public interface SentimentEvaluationMonitor
+ extends EvaluationMonitor<SentimentSample> {
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java
new file mode 100755
index 0000000..8ece791
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEvaluator.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import opennlp.tools.util.eval.Evaluator;
+import opennlp.tools.util.eval.FMeasure;
+
+/**
+ * Class for performing evaluation on the Sentiment Analysis Parser.
+ */
+public class SentimentEvaluator extends Evaluator<SentimentSample> {
+
+ private FMeasure fmeasure = new FMeasure();
+
+ private Sentiment sentiment;
+
+ /**
+ * Constructor
+ */
+ public SentimentEvaluator(Sentiment sentiment,
+ SentimentEvaluationMonitor... listeners) {
+ super(listeners);
+ this.sentiment = sentiment;
+ }
+
+ /**
+ * Returns the short description of the tool
+ *
+ * @param reference
+ * the reference to the SentimentSample to be processed
+ * @return the processed samples
+ */
+ @Override
+ protected SentimentSample processSample(SentimentSample reference) {
+ String prediction = sentiment.predict(reference.getSentence());
+ String label = reference.getSentiment();
+
+ fmeasure.updateScores(new String[] { label }, new String[] { prediction });
+
+ return new SentimentSample(prediction, reference.getSentence());
+ }
+
+ /**
+ * Returns the F-Measure
+ *
+ * @return the F-Measure
+ */
+ public FMeasure getFMeasure() {
+ return fmeasure;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java
new file mode 100755
index 0000000..8043460
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentEventStream.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.util.Iterator;
+
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.util.AbstractEventStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * Class for creating events for Sentiment Analysis that is later sent to
+ * MaxEnt.
+ */
+public class SentimentEventStream extends AbstractEventStream<SentimentSample> {
+
+ private SentimentContextGenerator contextGenerator;
+
+ /**
+ * Initializes the event stream.
+ *
+ * @param samples
+ * the sentiment samples to be used
+ * @param createContextGenerator
+ * the context generator to be used
+ */
+ public SentimentEventStream(ObjectStream<SentimentSample> samples,
+ SentimentContextGenerator createContextGenerator) {
+ super(samples);
+ contextGenerator = createContextGenerator;
+ }
+
+ /**
+ * Creates events.
+ *
+ * @param sample
+ * the sentiment sample to be used
+ * @return event iterator
+ */
+ @Override
+ protected Iterator<Event> createEvents(final SentimentSample sample) {
+
+ return new Iterator<Event>() {
+
+ private boolean isVirgin = true;
+
+ public boolean hasNext() {
+ return isVirgin;
+ }
+
+ public Event next() {
+
+ isVirgin = false;
+
+ return new Event(sample.getSentiment(),
+ contextGenerator.getContext(sample.getSentence()));
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java
new file mode 100755
index 0000000..9c284e4
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentFactory.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ext.ExtensionLoader;
+
+/**
+ * Class for creating sentiment factories for training.
+ */
+public class SentimentFactory extends BaseToolFactory {
+
+ private static final String TOKENIZER_NAME = "sentiment.tokenizer";
+
+ private Tokenizer tokenizer;
+
+ /**
+ * Validates the artifact map --> nothing to validate.
+ */
+ @Override
+ public void validateArtifactMap() throws InvalidFormatException {
+ // nothing to validate
+ }
+
+ /**
+ * Creates a new context generator.
+ *
+ * @return a context generator for Sentiment Analysis
+ */
+ public SentimentContextGenerator createContextGenerator() {
+ return new SentimentContextGenerator();
+ }
+
+ /**
+ * Returns the tokenizer
+ *
+ * @return the tokenizer
+ */
+ public Tokenizer getTokenizer() {
+ if (this.tokenizer == null) {
+ if (artifactProvider != null) {
+ String className = artifactProvider.getManifestProperty(TOKENIZER_NAME);
+ if (className != null) {
+ this.tokenizer = ExtensionLoader.instantiateExtension(Tokenizer.class,
+ className);
+ }
+ }
+ if (this.tokenizer == null) { // could not load using artifact provider
+ this.tokenizer = WhitespaceTokenizer.INSTANCE;
+ }
+ }
+ return tokenizer;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java
new file mode 100755
index 0000000..38c7ac9
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentME.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.Sequence;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * Class for creating a maximum-entropy-based Sentiment Analysis model.
+ */
+public class SentimentME implements Sentiment {
+
+ public static final int DEFAULT_BEAM_SIZE = 3;
+
+ protected SentimentContextGenerator contextGenerator;
+ private Sequence bestSequence;
+ protected SequenceClassificationModel<String> model;
+ private SentimentFactory factory;
+ private MaxentModel maxentModel;
+
+ private String lang;
+ private TrainingParameters params;
+
+ /**
+ * Constructor, initializes.
+ *
+ * @param sentModel
+ * sentiment analysis model
+ */
+ public SentimentME(SentimentModel sentModel) {
+ model = sentModel.getSentimentModel();
+ maxentModel = sentModel.getMaxentModel();
+ factory = sentModel.getFactory();
+ contextGenerator = factory.createContextGenerator();
+ }
+
+ public SentimentME(String lang, TrainingParameters params,
+ SentimentFactory factory) {
+ this.lang = Objects.requireNonNull(lang, "lang must be provided");
+ this.params = Objects.requireNonNull(params, "params must be provided");
+ this.factory = Objects.requireNonNull(factory, "factory must be provided");
+ contextGenerator = factory.createContextGenerator();
+ }
+
+ /**
+ * Trains a Sentiment Analysis model.
+ *
+ * @param languageCode
+ * the code for the language of the text, e.g. "en"
+ * @param samples
+ * the sentiment samples to be used
+ * @param trainParams
+ * parameters for training
+ * @param factory
+ * a Sentiment Analysis factory
+ * @return a Sentiment Analysis model
+ */
+ public SentimentModel train(ObjectStream<SentimentSample> samples)
+ throws IOException {
+ Map<String, String> entries = new HashMap<String, String>();
+ ObjectStream<Event> eventStream = new SentimentEventStream(samples,
+ contextGenerator);
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params, entries);
+ maxentModel = trainer.train(eventStream);
+ Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+ SentimentModel sentimentModel = new SentimentModel(lang, maxentModel,
+ manifestInfoEntries, factory);
+ model = sentimentModel.getSentimentModel();
+ return sentimentModel;
+ }
+
+ /**
+ * Makes a sentiment prediction
+ *
+ * @param tokens
+ * the tokens to be analyzed for its sentiment
+ * @return the predicted sentiment
+ */
+ @Override
+ public String predict(String[] tokens) {
+ if (tokens == null || tokens.length == 0) {
+ throw new IllegalArgumentException("Tokens must be not empty");
+ }
+ double[] prob = probabilities(tokens);
+ return getBestSentiment(prob);
+ }
+
+ /**
+ * Returns the best chosen sentiment for the text predicted on
+ *
+ * @param outcome
+ * the outcome
+ * @return the best sentiment
+ */
+ public String getBestSentiment(double[] outcome) {
+ return maxentModel.getBestOutcome(outcome);
+ }
+
+ /**
+ * Returns the analysis probabilities
+ *
+ * @param text
+ * the text to categorize
+ */
+ public double[] probabilities(String[] text) {
+ return maxentModel.eval(contextGenerator.getContext(text));
+ }
+
+ /**
+ * Returns an array of probabilities for each of the specified spans which is
+ * the arithmetic mean of the probabilities for each of the outcomes which
+ * make up the span.
+ *
+ * @param spans
+ * The spans of the sentiments for which probabilities are desired.
+ * @return an array of probabilities for each of the specified spans.
+ */
+ public double[] probs(Span[] spans) {
+ double[] sprobs = new double[spans.length];
+ double[] probs = bestSequence.getProbs();
+
+ for (int si = 0; si < spans.length; si++) {
+ double p = 0;
+
+ for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); oi++) {
+ p += probs[oi];
+ }
+
+ p /= spans[si].length();
+ sprobs[si] = p;
+ }
+
+ return sprobs;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java
new file mode 100755
index 0000000..924148d
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentModel.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Map;
+import java.util.Properties;
+
+import opennlp.tools.ml.BeamSearch;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.BaseModel;
+
+/**
+ * Class for the basis of the Sentiment Analysis model.
+ */
+public class SentimentModel extends BaseModel {
+
+ private static final String COMPONENT_NAME = "SentimentME";
+ private static final String SENTIMENT_MODEL_ENTRY_NAME = "sentiment.model";
+
+ /**
+ * Initializes the Sentiment Analysis model.
+ *
+ * @param languageCode
+ * the code for the language of the text, e.g. "en"
+ * @param sentimentModel
+ * a MaxEnt sentiment model
+ * @param manifestInfoEntries
+ * additional information in the manifest
+ * @param factory
+ * a Sentiment Analysis factory
+ */
+ public SentimentModel(String languageCode, MaxentModel sentimentModel,
+ Map<String, String> manifestInfoEntries, SentimentFactory factory) {
+ super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
+ artifactMap.put(SENTIMENT_MODEL_ENTRY_NAME, sentimentModel);
+ checkArtifactMap();
+ }
+
+ /**
+ * Initializes the Sentiment Analysis model.
+ *
+ * @param modelURL
+ * the URL to a file required for the model
+ */
+ public SentimentModel(URL modelURL)
+ throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
+
+ /**
+ * Initializes the Sentiment Analysis model.
+ *
+ * @param file
+ * the file required for the model
+ */
+ public SentimentModel(File file) throws InvalidFormatException, IOException {
+ super(COMPONENT_NAME, file);
+ }
+
+ public SentimentModel(InputStream modelIn)
+ throws InvalidFormatException, IOException {
+ super(COMPONENT_NAME, modelIn);
+ }
+
+ /**
+ * Return the model
+ *
+ * @return the model
+ */
+ public SequenceClassificationModel<String> getSentimentModel() {
+ Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
+
+ String beamSizeString = manifest
+ .getProperty(BeamSearch.BEAM_SIZE_PARAMETER);
+
+ int beamSize = SentimentME.DEFAULT_BEAM_SIZE;
+ if (beamSizeString != null) {
+ beamSize = Integer.parseInt(beamSizeString);
+ }
+
+ return new BeamSearch<>(beamSize,
+ (MaxentModel) artifactMap.get(SENTIMENT_MODEL_ENTRY_NAME));
+ }
+
+ /**
+ * Returns the sentiment factory
+ *
+ * @return the sentiment factory for the model
+ */
+ public SentimentFactory getFactory() {
+ return (SentimentFactory) this.toolFactory;
+ }
+
+ /**
+ * Returns the MaxEntropy model
+ *
+ * @return the MaxEnt model
+ */
+ public MaxentModel getMaxentModel() {
+ return (MaxentModel) artifactMap.get(SENTIMENT_MODEL_ENTRY_NAME);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java
new file mode 100755
index 0000000..c0c4b20
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSample.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Class for holding text used for sentiment analysis.
+ */
+public class SentimentSample {
+
+ private final String sentiment;
+ private final List<String> sentence;
+ private final boolean isClearAdaptiveData;
+ private final String id = null;
+
+ /**
+ * Initializes the current instance.
+ *
+ * @param sentiment
+ * training sentiment
+ * @param sentence
+ * training sentence
+ */
+ public SentimentSample(String sentiment, String[] sentence) {
+ this(sentiment, sentence, true);
+ }
+
+ public SentimentSample(String sentiment, String[] sentence,
+ boolean clearAdaptiveData) {
+ this.sentiment = Objects.requireNonNull(sentiment,
+ "sentiment must not be null");
+ Objects.requireNonNull(sentence, "sentence must not be null");
+ this.sentence = Collections.unmodifiableList(Arrays.asList(sentence));
+ this.isClearAdaptiveData = clearAdaptiveData;
+ }
+
+ /**
+ * Returns the sentiment
+ *
+ * @return the sentiment
+ */
+ public String getSentiment() {
+ return sentiment;
+ }
+
+ /**
+ * Returns the sentence used
+ *
+ * @return the sentence
+ */
+ public String[] getSentence() {
+ return sentence.toArray(new String[0]);
+ }
+
+ /**
+ * Returns the id
+ *
+ * @return the id
+ */
+ public String getId() {
+ return id;
+ }
+
+ /**
+ * Returns the value of isClearAdaptiveData
+ *
+ * @return true or false
+ */
+ public boolean isClearAdaptiveDataSet() {
+ return isClearAdaptiveData;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java
new file mode 100755
index 0000000..8dac2ee
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleStream.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.IOException;
+
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * Class for converting Strings through Data Stream to SentimentSample using
+ * tokenised text.
+ */
+public class SentimentSampleStream
+ extends FilterObjectStream<String, SentimentSample> {
+
+ /**
+ * Initializes the sample stream.
+ *
+ * @param samples
+ * the sentiment samples to be used
+ */
+ public SentimentSampleStream(ObjectStream<String> samples) {
+ super(samples);
+ }
+
+ /**
+ * Reads the text
+ *
+ * @return a ready-to-be-trained SentimentSample object
+ */
+ @Override
+ public SentimentSample read() throws IOException {
+ String sentence = samples.read();
+
+ if (sentence != null) {
+
+ // Whitespace tokenize entire string
+ String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(sentence);
+
+ SentimentSample sample;
+
+ if (tokens.length > 1) {
+ String sentiment = tokens[0];
+ String[] sentTokens = new String[tokens.length - 1];
+ System.arraycopy(tokens, 1, sentTokens, 0, tokens.length - 1);
+
+ sample = new SentimentSample(sentiment, sentTokens);
+ } else {
+ throw new IOException(
+ "Empty lines, or lines with only a category string are not allowed!");
+ }
+
+ return sample;
+ }
+
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java
new file mode 100755
index 0000000..68e7ecc
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentiment/SentimentSampleTypeFilter.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * Class for creating a type filter
+ */
+public class SentimentSampleTypeFilter
+ extends FilterObjectStream<SentimentSample, SentimentSample> {
+
+ private final Set<String> types;
+
+ /**
+ * Constructor
+ */
+ public SentimentSampleTypeFilter(String[] types,
+ ObjectStream<SentimentSample> samples) {
+ super(samples);
+ this.types = Collections
+ .unmodifiableSet(new HashSet<String>(Arrays.asList(types)));
+ }
+
+ /**
+ * Constructor
+ */
+ public SentimentSampleTypeFilter(Set<String> types,
+ ObjectStream<SentimentSample> samples) {
+ super(samples);
+ this.types = Collections.unmodifiableSet(new HashSet<String>(types));
+ }
+
+ /**
+ * Reads and returns sentiment samples.
+ *
+ * @return the sentiment sample read
+ */
+ @Override
+ public SentimentSample read() throws IOException {
+ SentimentSample sample = samples.read();
+ return sample;
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java
new file mode 100644
index 0000000..1daca03
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/AbstractSentimentTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class AbstractSentimentTest {
+
+ protected static final String TRAINING_DATASET = "opennlp/tools/sentiment/sample_train_categ";
+ protected static final String MODEL = "opennlp/tools/sentiment/sample_model";
+ protected static final String ENCODING = "ISO-8859-1";
+ protected static final String LANG = "en";
+
+ private static final SentimentFactory factory = new SentimentFactory();
+
+ protected Sentiment createEmptySentiment() {
+ TrainingParameters params = new TrainingParameters();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 50);
+ params.put(TrainingParameters.CUTOFF_PARAM, 1);
+ return new SentimentME(LANG, params, factory);
+ }
+
+ protected SentimentCrossValidator createCrossValidation() {
+ TrainingParameters params = new TrainingParameters();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 50);
+ params.put(TrainingParameters.CUTOFF_PARAM, 1);
+ return new SentimentCrossValidator(LANG, params, factory, null);
+ }
+
+ protected String[] tokenize(String txt) {
+ return factory.getTokenizer().tokenize(txt);
+ }
+
+ protected SentimentSampleStream createSampleStream() throws IOException {
+ MockInputStreamFactory mockStream = new MockInputStreamFactory(
+ new File(TRAINING_DATASET));
+ return new SentimentSampleStream(
+ new PlainTextByLineStream(mockStream, ENCODING));
+ }
+
+ protected Sentiment loadSentiment(File modelFile)
+ throws InvalidFormatException, IOException {
+ SentimentModel model = new SentimentModel(modelFile);
+ return new SentimentME(model);
+ }
+
+ protected File saveTempModel() throws IOException {
+ Sentiment sentiment = createEmptySentiment();
+ SentimentSampleStream sampleStream = createSampleStream();
+ SentimentModel model = sentiment.train(sampleStream);
+ File temp = File.createTempFile("sample_model", ".tmp");
+ CmdLineUtil.writeModel("sentiment analysis", temp, model);
+ return temp;
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java
new file mode 100755
index 0000000..c519c17
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentCrossValidatorTest.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SentimentCrossValidatorTest extends AbstractSentimentTest {
+
+ @Test
+ public void testWithNullResources() throws Exception {
+ SentimentSampleStream sampleStream = createSampleStream();
+ SentimentCrossValidator cv = createCrossValidation();
+
+ cv.evaluate(sampleStream, 2);
+
+ Assert.assertNotNull(cv.getFMeasure());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java
new file mode 100644
index 0000000..1e6280b
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEvaluatorTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import opennlp.tools.cmdline.sentiment.SentimentEvaluationErrorListener;
+import opennlp.tools.util.InvalidFormatException;
+
+public class SentimentEvaluatorTest extends AbstractSentimentTest {
+
+ private static final String LIKE_SENTENCE = "benefits from serendipity also reminds us of our "
+ + "own responsibility to question what is told as the truth";
+
+ private static final String ANGRY_SENTENCE = "Stupid , infantile , redundant , sloppy , "
+ + "over-the-top , and amateurish . Yep";
+
+ private static final String NEUTRAL = "stripped almost entirely of such tools as nudity , "
+ + "profanity and violence";
+
+ private Sentiment sentiment;
+
+ @Before
+ public void setup() throws IOException {
+ sentiment = createEmptySentiment();
+ SentimentSampleStream sampleStream = createSampleStream();
+ sentiment.train(sampleStream);
+ }
+
+ @Test
+ public void testPositive() throws InvalidFormatException, IOException {
+ String[] tokens = tokenize(LIKE_SENTENCE);
+ SentimentSample sample = new SentimentSample("like", tokens, false);
+ OutputStream stream = new ByteArrayOutputStream();
+ SentimentEvaluationMonitor listener = new SentimentEvaluationErrorListener(
+ stream);
+ SentimentEvaluator eval = new SentimentEvaluator(sentiment, listener);
+
+ eval.evaluateSample(sample);
+
+ Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
+
+ Assert.assertEquals(0, stream.toString().length());
+
+ tokens = tokenize(ANGRY_SENTENCE);
+ sample = new SentimentSample("angry", tokens, false);
+
+ Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0);
+ Assert.assertEquals(0, stream.toString().length());
+ }
+
+ @Test
+ public void testMissclassified() throws InvalidFormatException, IOException {
+ OutputStream stream = new ByteArrayOutputStream();
+ SentimentEvaluationMonitor listener = new SentimentEvaluationErrorListener(
+ stream);
+
+ String[] tokens = tokenize(NEUTRAL);
+ SentimentSample sample = new SentimentSample("like", tokens, false);
+ SentimentEvaluator eval = new SentimentEvaluator(sentiment, listener);
+
+ eval.evaluateSample(sample);
+
+ Assert.assertEquals(-1.0, eval.getFMeasure().getFMeasure(), 0.0);
+ Assert.assertNotEquals(0, stream.toString().length());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java
new file mode 100755
index 0000000..2fb9ee2
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentEventStreamTest.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.ObjectStreamUtils;
+
+public class SentimentEventStreamTest {
+
+ private static final String[] SENTENCE = { "benefits", "from", "serendipity",
+ "but", "also", "reminds", "us", "of", "our", "own", "responsibility",
+ "to", "question", "what", "is", "told", "as", "the", "truth" };
+ private static final String SENTIMENT = "like";
+ private static final SentimentContextGenerator CG = new SentimentContextGenerator();
+
+ @Test
+ public void testSentEventStream() throws Exception {
+ SentimentSample sample = new SentimentSample(SENTIMENT, SENTENCE, false);
+ ObjectStream<Event> eventStream = new SentimentEventStream(
+ ObjectStreamUtils.createObjectStream(sample), CG);
+
+ Assert.assertEquals(SENTIMENT, eventStream.read().getOutcome());
+
+ eventStream.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java
new file mode 100644
index 0000000..eb789ce
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentMETest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.io.File;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SentimentMETest extends AbstractSentimentTest {
+
+ @Test
+ public void testSentimentModel() throws Exception {
+ Sentiment sentiment = createEmptySentiment();
+ SentimentSampleStream sampleStream = createSampleStream();
+
+ sentiment.train(sampleStream);
+
+ // "Angry"
+ String[] tokens = tokenize(
+ "Stupid , infantile , redundant , sloppy , over-the-top , and amateurish . Yep");
+ Assert.assertEquals("angry", sentiment.predict(tokens));
+
+ // "Sad"
+ String[] tokens2 = tokenize(
+ "Strong filmmaking requires a clear sense of purpose , and in that oh-so-important category , "
+ + "The Four Feathers comes up short");
+ Assert.assertEquals("sad", sentiment.predict(tokens2));
+
+ // "Neutral"
+ String[] tokens3 = tokenize(
+ "to make its points about acceptance and growth");
+ Assert.assertEquals("neutral", sentiment.predict(tokens3));
+
+ // "Like"
+ String[] tokens4 = tokenize("best performance");
+ Assert.assertEquals("like", sentiment.predict(tokens4));
+
+ // "Love"
+ String[] tokens5 = tokenize("best short story writing");
+ Assert.assertEquals("love", sentiment.predict(tokens5));
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testEmptyModel() throws Exception {
+ Sentiment sentiment = createEmptySentiment();
+ String[] tokens = tokenize("best performance");
+ sentiment.predict(tokens);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEmptySentiment() throws Exception {
+ Sentiment sentiment = createEmptySentiment();
+ SentimentSampleStream sampleStream = createSampleStream();
+
+ sentiment.train(sampleStream);
+
+ String[] tokens = new String[] {};
+ sentiment.predict(tokens);
+ }
+
+ @Test
+ public void testWorkingModel() throws Exception {
+ File tempModel = saveTempModel();
+ Sentiment sentiment = loadSentiment(tempModel);
+
+ // "Angry"
+ String[] tokens = tokenize(
+ "Stupid , infantile , redundant , sloppy , over-the-top , and amateurish . Yep");
+ Assert.assertEquals("angry", sentiment.predict(tokens));
+
+ // "Sad"
+ String[] tokens2 = tokenize(
+ "Strong filmmaking requires a clear sense of purpose , and in that oh-so-important category , "
+ + "The Four Feathers comes up short");
+ Assert.assertEquals("sad", sentiment.predict(tokens2));
+
+ // "Neutral"
+ String[] tokens3 = tokenize(
+ "to make its points about acceptance and growth");
+ Assert.assertEquals("neutral", sentiment.predict(tokens3));
+
+ // "Like"
+ String[] tokens4 = tokenize("best performance");
+ Assert.assertEquals("like", sentiment.predict(tokens4));
+
+ // "Love"
+ String[] tokens5 = tokenize("best short story writing");
+ Assert.assertEquals("love", sentiment.predict(tokens5));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java
new file mode 100755
index 0000000..6306f0d
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/sentiment/SentimentSampleTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.sentiment;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SentimentSampleTest {
+
+ @Test
+ public void testSentSample() throws Exception {
+ String[] sentence = { "benefits", "from", "serendipity", "but", "also",
+ "reminds", "us", "of", "our", "own", "responsibility", "to", "question",
+ "what", "is", "told", "as", "the", "truth" };
+ String sentiment = "like";
+ List<String> sentenceList = Collections
+ .unmodifiableList(Arrays.asList(sentence));
+
+ SentimentSample sample = new SentimentSample(sentiment, sentence, false);
+
+ Assert.assertEquals("like", sample.getSentiment());
+ Assert.assertEquals(false, sample.isClearAdaptiveDataSet());
+ Assert.assertArrayEquals(sentence, sentenceList.toArray(new String[0]));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin
new file mode 100644
index 0000000..0a34a33
Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-netflix-sentiment.bin differ
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin
new file mode 100644
index 0000000..c9d79e8
Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/en-stanford-sentiment.bin differ
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin
new file mode 100644
index 0000000..0e3511e
Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-lg-model-raw.bin differ
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin
new file mode 100644
index 0000000..253c9fd
Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-bin.bin differ
http://git-wip-us.apache.org/repos/asf/opennlp/blob/56321aab/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin
new file mode 100644
index 0000000..3bb6cc6
Binary files /dev/null and b/opennlp-tools/src/test/resources/opennlp/tools/sentiment/ht-sentiment-categ.bin differ