You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/01/20 09:44:45 UTC

[opennlp-sandbox] branch migrate-mahout-addon-to-opennlp-tools-2_1_0 created (now 81ac839)

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a change to branch migrate-mahout-addon-to-opennlp-tools-2_1_0
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


      at 81ac839  updates sandbox component 'mahout-addon' to be compatible with latest opennlp-tools release

This branch includes the following new commits:

     new 81ac839  updates sandbox component 'mahout-addon' to be compatible with latest opennlp-tools release

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[opennlp-sandbox] 01/01: updates sandbox component 'mahout-addon' to be compatible with latest opennlp-tools release

Posted by ma...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch migrate-mahout-addon-to-opennlp-tools-2_1_0
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit 81ac83965a8fa42d89aeb90ea5946fa396f5578e
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Fri Jan 20 10:44:37 2023 +0100

    updates sandbox component 'mahout-addon' to be compatible with latest opennlp-tools release
    
    - adjusts opennlp-tools to 2.1.0
    - adjusts parent project (org.apache.apache) to version 18
    - adjusts Java language level to 11
    - updates to mahout-core in version 0.9 to mitigate several CVEs
    - removes unused imports
---
 mahout-addon/pom.xml                               | 32 +++++++++++++++-------
 .../mahout/AbstractOnlineLearnerTrainer.java       | 15 ++++------
 .../addons/mahout/LogisticRegressionTrainer.java   |  5 ----
 .../mahout/OnlineLogisticRegressionTrainer.java    |  1 -
 .../addons/mahout/VectorClassifierModel.java       |  3 ++
 5 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/mahout-addon/pom.xml b/mahout-addon/pom.xml
index d2c2d0a..81a92e3 100644
--- a/mahout-addon/pom.xml
+++ b/mahout-addon/pom.xml
@@ -21,15 +21,17 @@
 
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
-	
+
 	<parent>
-	    <groupId>org.apache.opennlp</groupId>
-	    <artifactId>opennlp</artifactId>
-	    <version>1.6.0-SNAPSHOT</version>
-	    <relativePath>../opennlp/pom.xml</relativePath>
-    </parent>
-    
+		<groupId>org.apache</groupId>
+		<artifactId>apache</artifactId>
+		<!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. -->
+		<version>18</version>
+		<relativePath />
+	</parent>
+
 	<artifactId>mahout-addon</artifactId>
+	<version>2.1.1-SNAPSHOT</version>
 	<packaging>jar</packaging>
 	<name>Apache OpenNLP Mahout Addon</name>
 
@@ -37,24 +39,34 @@
 		<dependency>
 			<groupId>org.apache.opennlp</groupId>
 			<artifactId>opennlp-tools</artifactId>
-			<version>1.6.0-SNAPSHOT</version>
+			<version>2.1.0</version>
 		</dependency>
-		
+
 		<dependency>
 			<groupId>org.apache.mahout</groupId>
 			<artifactId>mahout-core</artifactId>
-			<version>0.8</version>
+			<version>0.9</version>
 		</dependency>
 
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
+			<version>4.13.1</version>
 			<scope>test</scope>
 		</dependency>
 	</dependencies>
 
 	<build>
 		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<configuration>
+					<source>11</source>
+					<target>11</target>
+					<compilerArgument>-Xlint</compilerArgument>
+				</configuration>
+			</plugin>
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-dependency-plugin</artifactId>
diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java
index 9b8d85e..cfe5a73 100644
--- a/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java
+++ b/mahout-addon/src/main/java/opennlp/addons/mahout/AbstractOnlineLearnerTrainer.java
@@ -19,16 +19,12 @@
 
 package opennlp.addons.mahout;
 
-import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
 import opennlp.tools.ml.AbstractEventTrainer;
 import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.MaxentModel;
 
-import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression;
-import org.apache.mahout.classifier.sgd.L1;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 
@@ -39,6 +35,7 @@ abstract class AbstractOnlineLearnerTrainer extends AbstractEventTrainer {
   public AbstractOnlineLearnerTrainer() {
   }
 
+  @Override
   public void init(Map<String, String> trainParams,
 	      Map<String, String> reportMap) {
 	  String iterationsValue = trainParams.get("Iterations");
@@ -53,16 +50,16 @@ abstract class AbstractOnlineLearnerTrainer extends AbstractEventTrainer {
   
   protected void trainOnlineLearner(DataIndexer indexer, org.apache.mahout.classifier.OnlineLearner pa) {
     int cardinality = indexer.getPredLabels().length;
-    int outcomes[] = indexer.getOutcomeList();
+    int[] outcomes = indexer.getOutcomeList();
     
     for (int i = 0; i < indexer.getContexts().length; i++) {
 
       Vector vector = new RandomAccessSparseVector(cardinality);
       
-      int features[] = indexer.getContexts()[i];
-      
-      for (int fi = 0; fi < features.length; fi++) {
-        vector.set(features[fi], indexer.getNumTimesEventsSeen()[i]);
+      int[] features = indexer.getContexts()[i];
+
+      for (int feature : features) {
+        vector.set(feature, indexer.getNumTimesEventsSeen()[i]);
       } 
       
       pa.train(outcomes[i], vector);
diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java
index 85442bb..b2a29ae 100644
--- a/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java
+++ b/mahout-addon/src/main/java/opennlp/addons/mahout/LogisticRegressionTrainer.java
@@ -23,16 +23,11 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
-import opennlp.tools.ml.AbstractEventTrainer;
 import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.MaxentModel;
 
 import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression;
 import org.apache.mahout.classifier.sgd.L1;
-import org.apache.mahout.classifier.sgd.OnlineLogisticRegression;
-import org.apache.mahout.classifier.sgd.PassiveAggressive;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.Vector;
 
 public class LogisticRegressionTrainer extends AbstractOnlineLearnerTrainer {
   
diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java b/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java
index 5b09870..ee23b9c 100644
--- a/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java
+++ b/mahout-addon/src/main/java/opennlp/addons/mahout/OnlineLogisticRegressionTrainer.java
@@ -25,7 +25,6 @@ import java.util.Map;
 import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.MaxentModel;
 
-import org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression;
 import org.apache.mahout.classifier.sgd.L1;
 import org.apache.mahout.classifier.sgd.OnlineLogisticRegression;
 
diff --git a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java b/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java
index 046ed2a..fabe3e2 100644
--- a/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java
+++ b/mahout-addon/src/main/java/opennlp/addons/mahout/VectorClassifierModel.java
@@ -42,6 +42,7 @@ public class VectorClassifierModel implements MaxentModel {
     this.predMap = predMap;
   }
 
+  @Override
   public double[] eval(String[] features) {
     Vector vector = new RandomAccessSparseVector(predMap.size());
     
@@ -64,10 +65,12 @@ public class VectorClassifierModel implements MaxentModel {
     return outcomes;
   }
 
+  @Override
   public double[] eval(String[] context, double[] probs) {
     return eval(context);
   }
 
+  @Override
   public double[] eval(String[] context, float[] values) {
     return eval(context);
   }