You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by dw...@apache.org on 2022/06/05 19:35:30 UTC

[solr] branch main updated: Upgrade Carrot2 to 4.4.2 and HPPC to 0.9.1 (#884)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 48f1f8cbc9a Upgrade Carrot2 to 4.4.2 and HPPC to 0.9.1 (#884)
48f1f8cbc9a is described below

commit 48f1f8cbc9a494652244e143911af8443996fb59
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Sun Jun 5 21:35:25 2022 +0200

    Upgrade Carrot2 to 4.4.2 and HPPC to 0.9.1 (#884)
---
 solr/CHANGES.txt                                             |  2 ++
 solr/licenses/carrot2-core-4.0.4.jar.sha1                    |  1 -
 solr/licenses/carrot2-core-4.4.2.jar.sha1                    |  1 +
 solr/licenses/hppc-0.8.2.jar.sha1                            |  1 -
 solr/licenses/hppc-0.9.1.jar.sha1                            |  1 +
 solr/modules/clustering/build.gradle                         |  1 +
 .../testCustomLanguageResources/english.label-filters.json   |  9 +++++++++
 .../conf/testCustomLanguageResources/english.stoplabels.utf8 |  1 -
 .../conf/testCustomLanguageResources/english.stopwords.utf8  |  2 --
 .../testCustomLanguageResources/english.word-filters.json    | 10 ++++++++++
 ...ClusteringComponentDistributedTest-testLingoAlgorithm.txt | 11 +++++------
 .../ClusteringComponentDistributedTest-testStcAlgorithm.txt  |  1 +
 .../ClusteringComponentTest-testLingoAlgorithm.txt           | 11 +++++------
 .../clustering/ClusteringComponentTest-testStcAlgorithm.txt  |  3 ++-
 .../clustering/ClusteringComponentDistributedTest.java       |  2 ++
 .../solr/handler/clustering/ClusteringComponentTest.java     |  2 ++
 .../solr/handler/clustering/ResourceCheckAlgorithm.java      | 12 +++++++-----
 versions.lock                                                |  4 ++--
 versions.props                                               |  4 ++--
 19 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7227324bc55..dd00dfbdc51 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -48,6 +48,8 @@ Improvements
 
 * SOLR-16181: Initialize the LogWatcher earlier in CoreContainer#load() (janhoy)
 
+* SOLR-16225: Upgrade dependencies (Carrot2, HPPC) (Dawid Weiss)
+
 Optimizations
 ---------------------
 * SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob)
diff --git a/solr/licenses/carrot2-core-4.0.4.jar.sha1 b/solr/licenses/carrot2-core-4.0.4.jar.sha1
deleted file mode 100644
index 0d26e39096c..00000000000
--- a/solr/licenses/carrot2-core-4.0.4.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-fb60ab80cfd69abe6cad1939f24bd5210501b177
diff --git a/solr/licenses/carrot2-core-4.4.2.jar.sha1 b/solr/licenses/carrot2-core-4.4.2.jar.sha1
new file mode 100644
index 00000000000..9bdf9a6c06e
--- /dev/null
+++ b/solr/licenses/carrot2-core-4.4.2.jar.sha1
@@ -0,0 +1 @@
+20f170227ddbbf1e012bf504a3ab21f5696f19d2
diff --git a/solr/licenses/hppc-0.8.2.jar.sha1 b/solr/licenses/hppc-0.8.2.jar.sha1
deleted file mode 100644
index a73358b9c66..00000000000
--- a/solr/licenses/hppc-0.8.2.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-ccb3ef933ead6b5d766fa571582ddb9b447e48c4
diff --git a/solr/licenses/hppc-0.9.1.jar.sha1 b/solr/licenses/hppc-0.9.1.jar.sha1
new file mode 100644
index 00000000000..5cedaaa6433
--- /dev/null
+++ b/solr/licenses/hppc-0.9.1.jar.sha1
@@ -0,0 +1 @@
+4bf4c51e06aec600894d841c4c004566b20dd357
diff --git a/solr/modules/clustering/build.gradle b/solr/modules/clustering/build.gradle
index fa0811dda7d..652a7d60528 100644
--- a/solr/modules/clustering/build.gradle
+++ b/solr/modules/clustering/build.gradle
@@ -29,6 +29,7 @@ dependencies {
   implementation 'org.slf4j:slf4j-api'
 
   testImplementation project(':solr:test-framework')
+  testImplementation 'org.apache.lucene:lucene-test-framework'
   testImplementation 'com.carrotsearch.randomizedtesting:randomizedtesting-runner'
   testImplementation 'junit:junit'
   testImplementation 'org.hamcrest:hamcrest'
diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json
new file mode 100644
index 00000000000..7c805464b2b
--- /dev/null
+++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.label-filters.json
@@ -0,0 +1,9 @@
+{
+  "glob": [
+  ],
+  "regexp": [
+    "ba.+"
+  ],
+  "exact": [
+  ]
+}
diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8
deleted file mode 100644
index 1471282e6f8..00000000000
--- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stoplabels.utf8
+++ /dev/null
@@ -1 +0,0 @@
-ba.+
\ No newline at end of file
diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8 b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8
deleted file mode 100644
index 3bd1f0e2974..00000000000
--- a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.stopwords.utf8
+++ /dev/null
@@ -1,2 +0,0 @@
-foo
-bar
diff --git a/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json
new file mode 100644
index 00000000000..7746028055e
--- /dev/null
+++ b/solr/modules/clustering/src/test-files/clustering/solr/collection1/conf/testCustomLanguageResources/english.word-filters.json
@@ -0,0 +1,10 @@
+{
+  "exact": [
+    "foo",
+    "bar"
+  ],
+  "glob": [
+  ],
+  "regexp": [
+  ]
+}
diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt
index ee8b3842d0d..3f35f30ecf5 100644
--- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt
+++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testLingoAlgorithm.txt
@@ -1,17 +1,16 @@
 - Knowledge Discovery [6]
-- Patterns [6]
 - Data Mining Applications [5]
 - Statistical Analysis [4]
+- Analysis Techniques [3]
 - Computer [3]
-- Creating [3]
 - Data Mining Solutions [3]
 - Known as Data Mining [3]
 - Text Mining [3]
-- Databases KDD [2]
-- Extraction of Hidden Predictive [2]
+- Courses [2]
+- Extraction of Hidden Predictive Information [2]
 - Information from Large [2]
+- Machine Learning [2]
+- Neural Networks [2]
 - Open [2]
 - Powers [2]
 - Searching [2]
-- Tools [2]
-- Other topics [1]
\ No newline at end of file
diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt
index 1ee2114eeb2..aef40034692 100644
--- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt
+++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest-testStcAlgorithm.txt
@@ -7,4 +7,5 @@
 - Businesses [4]
 - Predictive [4]
 - Process [4]
+- Hidden Predictive Information [2]
 - Other topics [2]
diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt
index 107b60f82f2..3f35f30ecf5 100644
--- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt
+++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testLingoAlgorithm.txt
@@ -1,17 +1,16 @@
 - Knowledge Discovery [6]
-- Patterns [6]
 - Data Mining Applications [5]
 - Statistical Analysis [4]
+- Analysis Techniques [3]
 - Computer [3]
-- Creating [3]
 - Data Mining Solutions [3]
 - Known as Data Mining [3]
 - Text Mining [3]
-- Databases KDD [2]
-- Extraction of Hidden Predictive [2]
+- Courses [2]
+- Extraction of Hidden Predictive Information [2]
 - Information from Large [2]
+- Machine Learning [2]
+- Neural Networks [2]
 - Open [2]
 - Powers [2]
 - Searching [2]
-- Tools [2]
-- Other topics [1]
diff --git a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt
index 1ee2114eeb2..a6d985098e2 100644
--- a/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt
+++ b/solr/modules/clustering/src/test-files/org/apache/solr/handler/clustering/ClusteringComponentTest-testStcAlgorithm.txt
@@ -4,7 +4,8 @@
 - Analysis [5]
 - Applications [5]
 - Software [5]
-- Businesses [4]
+- Business [4]
 - Predictive [4]
 - Process [4]
+- Hidden Predictive Information [2]
 - Other topics [2]
diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java
index 1a9398a6ba5..743bbfa2b86 100644
--- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java
+++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentDistributedTest.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import java.util.List;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.solr.BaseDistributedSearchTestCase;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.response.Cluster;
@@ -72,6 +73,7 @@ public class ClusteringComponentDistributedTest extends BaseDistributedSearchTes
             }));
   }
 
+  @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149")
   @Test
   @ShardsFixed(num = 2)
   public void testStcAlgorithm() throws Exception {
diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
index 0c6c968444b..8506f7f1726 100644
--- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
+++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
@@ -31,6 +31,7 @@ import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import org.apache.commons.io.FileUtils;
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.response.ClusteringResponse;
 import org.apache.solr.common.SolrDocument;
@@ -86,6 +87,7 @@ public class ClusteringComponentTest extends SolrTestCaseJ4 {
     compareToExpected(clusters("lingo", QUERY_TESTSET_SAMPLE_DOCUMENTS));
   }
 
+  @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/carrot2/carrot2/issues/149")
   @Test
   public void testStcAlgorithm() throws Exception {
     compareToExpected(clusters("stc", QUERY_TESTSET_SAMPLE_DOCUMENTS));
diff --git a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java
index a7bc78c07cb..16c275951c7 100644
--- a/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java
+++ b/solr/modules/clustering/src/test/org/apache/solr/handler/clustering/ResourceCheckAlgorithm.java
@@ -28,8 +28,9 @@ import org.carrot2.attrs.AttrString;
 import org.carrot2.clustering.Cluster;
 import org.carrot2.clustering.ClusteringAlgorithm;
 import org.carrot2.clustering.Document;
+import org.carrot2.language.LabelFilter;
 import org.carrot2.language.LanguageComponents;
-import org.carrot2.language.LexicalData;
+import org.carrot2.language.StopwordFilter;
 
 /**
  * Creates synthetic clusters with diagnostics of {@link LanguageComponents} passed to the
@@ -42,7 +43,7 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith
 
   @Override
   public Set<Class<?>> requiredLanguageComponents() {
-    return Set.of(LexicalData.class);
+    return Set.of(StopwordFilter.class, LabelFilter.class);
   }
 
   @Override
@@ -57,7 +58,8 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith
     cluster = new Cluster<>();
     clusters.add(cluster);
 
-    LexicalData lexicalData = languageComponents.get(LexicalData.class);
+    StopwordFilter stopwordFilter = languageComponents.get(StopwordFilter.class);
+    LabelFilter labelFilter = languageComponents.get(LabelFilter.class);
     cluster.addLabel(
         Arrays.stream(text.get().trim().split("[\\s]+"))
             .map(
@@ -66,8 +68,8 @@ class ResourceCheckAlgorithm extends AttrComposite implements ClusteringAlgorith
                         Locale.ROOT,
                         "%s[%s, %s]",
                         term,
-                        lexicalData.ignoreWord(term) ? "ignoredWord" : "-",
-                        lexicalData.ignoreLabel(term) ? "ignoredLabel" : "-"))
+                        stopwordFilter.test(term) ? "-" : "ignoredWord",
+                        labelFilter.test(term) ? "-" : "ignoredLabel"))
             .collect(Collectors.joining(" ")));
 
     return clusters;
diff --git a/versions.lock b/versions.lock
index ceeb6b1f9a9..ec3866498d7 100644
--- a/versions.lock
+++ b/versions.lock
@@ -1,6 +1,6 @@
 # Run ./gradlew --write-locks to regenerate this file
 com.beust:jcommander:1.82 (2 constraints: 2b123714)
-com.carrotsearch:hppc:0.8.2 (2 constraints: ad0fc5a6)
+com.carrotsearch:hppc:0.9.1 (2 constraints: ac0fc8a6)
 com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.7.9 (2 constraints: da1558e3)
 com.cybozu.labs:langdetect:1.1-20120112 (1 constraints: 5c066d5e)
 com.epam:parso:2.0.14 (1 constraints: 8e0c750e)
@@ -210,7 +210,7 @@ org.bouncycastle:bcpkix-jdk15on:1.70 (2 constraints: ce1b11b3)
 org.bouncycastle:bcprov-jdk15on:1.70 (4 constraints: 1f34ee12)
 org.bouncycastle:bcutil-jdk15on:1.70 (2 constraints: 961ad454)
 org.brotli:dec:0.1.2 (1 constraints: 5a0ce101)
-org.carrot2:carrot2-core:4.0.4 (1 constraints: 0a050336)
+org.carrot2:carrot2-core:4.4.2 (1 constraints: 0c050d36)
 org.carrot2:morfologik-fsa:2.1.8 (1 constraints: da0d9b36)
 org.carrot2:morfologik-polish:2.1.8 (1 constraints: d212531e)
 org.carrot2:morfologik-stemming:2.1.8 (2 constraints: d61f8b00)
diff --git a/versions.props b/versions.props
index bdaaf42b5dd..ecf354b9477 100644
--- a/versions.props
+++ b/versions.props
@@ -1,6 +1,6 @@
 com.adobe.testing:s3mock-junit4=2.1.34
 com.carrotsearch.randomizedtesting:*=2.7.9
-com.carrotsearch:hppc=0.8.2
+com.carrotsearch:hppc=0.9.1
 com.cybozu.labs:langdetect=1.1-20120112
 com.fasterxml.jackson:jackson-bom=2.13.3
 com.fasterxml.woodstox:woodstox-core=6.2.8
@@ -53,7 +53,7 @@ org.apache.opennlp:opennlp-tools=1.9.4
 org.apache.tika:*=1.28.1
 org.apache.zookeeper:*=3.7.0
 org.bitbucket.b_c:jose4j=0.7.9
-org.carrot2:carrot2-core=4.0.4
+org.carrot2:carrot2-core=4.4.2
 org.codehaus.woodstox:stax2-api=4.2.1
 org.eclipse.jetty*:*=9.4.44.v20210927
 org.hamcrest:*=2.2