You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2021/10/14 08:04:37 UTC

[lucene] branch main updated: LUCENE-10172 - minor java code improvements to Lucene Classification (#381)

This is an automated email from the ASF dual-hosted git repository.

tommaso pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new cfd9f9f9 LUCENE-10172 - minor java code improvements to Lucene Classification (#381)
cfd9f9f9 is described below

commit cfd9f9f98f7176fb71ff11d98cba61be366db758
Author: Tommaso Teofili <to...@apache.org>
AuthorDate: Thu Oct 14 10:04:33 2021 +0200

    LUCENE-10172 - minor java code improvements to Lucene Classification (#381)
    
    * LUCENE-10172 - minor code improvements
    
    * LUCENE-10172 - spotlessApply
---
 .../lucene/classification/BooleanPerceptronClassifier.java     |  8 ++++----
 .../apache/lucene/classification/KNearestFuzzyClassifier.java  | 10 ++++------
 .../lucene/classification/KNearestNeighborClassifier.java      | 10 ++++------
 .../lucene/classification/SimpleNaiveBayesClassifier.java      |  2 +-
 .../document/SimpleNaiveBayesDocumentClassifier.java           |  2 +-
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
index 1747069..7cd47cc 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
@@ -216,7 +216,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
 
   @Override
   public ClassificationResult<Boolean> assignClass(String text) throws IOException {
-    Long output = 0L;
+    long output = 0L;
     try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
       CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
       tokenStream.reset();
@@ -230,17 +230,17 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
       tokenStream.end();
     }
 
-    double score = 1 - Math.exp(-1 * Math.abs(bias - output.doubleValue()) / bias);
+    double score = 1 - Math.exp(-1 * Math.abs(bias - (double) output) / bias);
     return new ClassificationResult<>(output >= bias, score);
   }
 
   @Override
-  public List<ClassificationResult<Boolean>> getClasses(String text) throws IOException {
+  public List<ClassificationResult<Boolean>> getClasses(String text) {
     return null;
   }
 
   @Override
-  public List<ClassificationResult<Boolean>> getClasses(String text, int max) throws IOException {
+  public List<ClassificationResult<Boolean>> getClasses(String text, int max) {
     return null;
   }
 }
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java
index 2fc3cdec..6e955b3 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestFuzzyClassifier.java
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.classification.utils.NearestFuzzyQuery;
 import org.apache.lucene.index.IndexReader;
@@ -93,11 +94,8 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
     this.classFieldName = classFieldName;
     this.analyzer = analyzer;
     this.indexSearcher = new IndexSearcher(indexReader);
-    if (similarity != null) {
-      this.indexSearcher.setSimilarity(similarity);
-    } else {
-      this.indexSearcher.setSimilarity(new BM25Similarity());
-    }
+    this.indexSearcher.setSimilarity(
+        Objects.requireNonNullElseGet(similarity, BM25Similarity::new));
     this.query = query;
     this.k = k;
   }
@@ -166,7 +164,7 @@ public class KNearestFuzzyClassifier implements Classifier<BytesRef> {
       if (storableField != null) {
         BytesRef cl = new BytesRef(storableField.stringValue());
         // update count
-        classCounts.merge(cl, 1, (a, b) -> a + b);
+        classCounts.merge(cl, 1, Integer::sum);
         // update boost, the boost is based on the best score
         Double totalBoost = classBoosts.get(cl);
         double singleBoost = scoreDoc.score / maxScore;
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
index 8c6ed10..d39e5b6 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
@@ -24,6 +24,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
@@ -102,11 +103,8 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
     this.mlt.setAnalyzer(analyzer);
     this.mlt.setFieldNames(textFieldNames);
     this.indexSearcher = new IndexSearcher(indexReader);
-    if (similarity != null) {
-      this.indexSearcher.setSimilarity(similarity);
-    } else {
-      this.indexSearcher.setSimilarity(new BM25Similarity());
-    }
+    this.indexSearcher.setSimilarity(
+        Objects.requireNonNullElseGet(similarity, BM25Similarity::new));
     if (minDocsFreq > 0) {
       mlt.setMinDocFreq(minDocsFreq);
     }
@@ -199,7 +197,7 @@ public class KNearestNeighborClassifier implements Classifier<BytesRef> {
         if (singleStorableField != null) {
           BytesRef cl = new BytesRef(singleStorableField.stringValue());
           // update count
-          classCounts.merge(cl, 1, (a, b) -> a + b);
+          classCounts.merge(cl, 1, Integer::sum);
           // update boost, the boost is based on the best score
           Double totalBoost = classBoosts.get(cl);
           double singleBoost = scoreDoc.score / maxScore;
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
index 4083cf8..c1b5601 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
@@ -282,7 +282,7 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
   }
 
   private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException {
-    return Math.log((double) docCount(term)) - Math.log(docsWithClassSize);
+    return Math.log(docCount(term)) - Math.log(docsWithClassSize);
   }
 
   private int docCount(Term term) throws IOException {
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java
index eddf02d..8110c96 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java
@@ -269,7 +269,7 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
   }
 
   private double calculateLogPrior(Term term, int docsWithClassSize) throws IOException {
-    return Math.log((double) docCount(term)) - Math.log(docsWithClassSize);
+    return Math.log(docCount(term)) - Math.log(docsWithClassSize);
   }
 
   private int docCount(Term term) throws IOException {