You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/02/15 19:21:08 UTC

[lucene-solr] branch master updated: LUCENE-9772: Hunspell: CHECKCOMPOUNDCASE shouldn't prohibit dash-separated uppercase compounds (#2370)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 1ff11dd  LUCENE-9772: Hunspell: CHECKCOMPOUNDCASE shouldn't prohibit dash-separated uppercase compounds (#2370)
1ff11dd is described below

commit 1ff11dd02c59aa51786814ad1822655cd66e3e3c
Author: Peter Gromov <pe...@jetbrains.com>
AuthorDate: Mon Feb 15 20:20:58 2021 +0100

    LUCENE-9772: Hunspell: CHECKCOMPOUNDCASE shouldn't prohibit dash-separated uppercase compounds (#2370)
---
 .../common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java | 4 +++-
 .../test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff    | 1 +
 .../test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic    | 3 ++-
 .../test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good   | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
index 3286f86..9988421 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@@ -336,7 +336,9 @@ public class Hunspell {
 
   private boolean mayBreakIntoCompounds(char[] chars, int offset, int length, int breakPos) {
     if (dictionary.checkCompoundCase) {
-      if (Character.isUpperCase(chars[breakPos - 1]) || Character.isUpperCase(chars[breakPos])) {
+      char a = chars[breakPos - 1];
+      char b = chars[breakPos];
+      if ((Character.isUpperCase(a) || Character.isUpperCase(b)) && a != '-' && b != '-') {
         return false;
       }
     }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff
index 7ac46ee..006739d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff
@@ -1,3 +1,4 @@
 # forbid upper case letters at word bounds in compounding
 CHECKCOMPOUNDCASE
+WORDCHARS -
 COMPOUNDFLAG A
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic
index 80f65d3..6bdbbba 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic
@@ -1,5 +1,6 @@
-4
+5
 foo/A
 Bar/A
 BAZ/A
 -/A
+prefix-/A
\ No newline at end of file
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good
index 9cbd790..a2dfe5b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good
@@ -3,3 +3,4 @@ foo-Bar
 foo-BAZ
 BAZ-foo
 BAZ-Bar
+prefix-BAZ
\ No newline at end of file