You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/23 10:31:04 UTC
[34/41] lucene-solr:jira/solr-11702: LUCENE-8124: Fixed
HyphenationCompoundWordTokenFilter to handle correctly hyphenation patterns
with indicator >= 7.
LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly hyphenation patterns with indicator >= 7.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f5e22670
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f5e22670
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f5e22670
Branch: refs/heads/jira/solr-11702
Commit: f5e2267097df5bee3942c719facbca137a56f3f8
Parents: fc6f3a4
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 22 08:46:01 2018 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 22 08:46:01 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +++
.../compound/hyphenation/HyphenationTree.java | 4 ++--
.../compound/TestCompoundWordTokenFilter.java | 15 +++++++++++++++
3 files changed, 20 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f5e22670/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6b90215..e95d066 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -146,6 +146,9 @@ Bug Fixes
* LUCENE-8130: Fix NullPointerException from TermStates.toString() (Mike McCandless)
+* LUCENE-8124: Fixed HyphenationCompoundWordTokenFilter to handle correctly
+ hyphenation patterns with indicator >= 7. (Holger Bruch via Adrien Grand)
+
Other
* LUCENE-8111: IndexOrDocValuesQuery Javadoc references outdated method name.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f5e22670/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
index 0f7dd2b..3c72b4f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
@@ -89,7 +89,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
StringBuilder buf = new StringBuilder();
byte v = vspace.get(k++);
while (v != 0) {
- char c = (char) ((v >>> 4) - 1 + '0');
+ char c = (char) (((v & 0xf0 )>>> 4) - 1 + '0');
buf.append(c);
c = (char) (v & 0x0f);
if (c == 0) {
@@ -151,7 +151,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer {
StringBuilder buf = new StringBuilder();
byte v = vspace.get(k++);
while (v != 0) {
- char c = (char) ((v >>> 4) - 1);
+ char c = (char) (((v & 0xf0 )>>> 4) - 1);
buf.append(c);
c = (char) (v & 0x0f);
if (c == 0) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f5e22670/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index ed3abe4..67a1bb4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -262,6 +262,21 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
}
}
+
+ public void testLucene8124() throws Exception {
+ InputSource is = new InputSource(getClass().getResource("hyphenation-LUCENE-8124.xml").toExternalForm());
+ HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
+ .getHyphenationTree(is);
+
+ HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
+ whitespaceMockTokenizer(
+ "Rindfleisch"),
+ hyphenator);
+
+ // TODO Rindfleisch returned twice is another issue of the HyphenationCompoundTokenFilter
+ assertTokenStreamContents(tf, new String[] { "Rindfleisch", "Rind", "Rindfleisch", "fleisch"});
+ }
+
public static interface MockRetainAttribute extends Attribute {
void setRetain(boolean attr);