You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/15 16:19:19 UTC

svn commit: r1610708 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/

Author: rmuir
Date: Tue Jul 15 14:19:19 2014
New Revision: 1610708

URL: http://svn.apache.org/r1610708
Log:
LUCENE-5824: hunspell FLAG LONG implemented incorrectly

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1610708&r1=1610707&r2=1610708&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Jul 15 14:19:19 2014
@@ -76,6 +76,8 @@ Bug Fixes
   match affixes, words are only stripped to a zero-length string if FULLSTRIP option
   is specifed in the dictionary.  (Robert Muir)
 
+* LUCENE-5824: Fix hunspell 'long' flag handling. (Robert Muir)
+
 Test Framework
 
 * LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1610708&r1=1610707&r2=1610708&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Tue Jul 15 14:19:19 2014
@@ -1072,8 +1072,6 @@ public class Dictionary {
   /**
    * Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded as two ASCII characters whose codes
    * must be combined into a single character.
-   *
-   * TODO (rmuir) test
    */
   private static class DoubleASCIIFlagParsingStrategy extends FlagParsingStrategy {
 
@@ -1088,8 +1086,13 @@ public class Dictionary {
         throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
       }
       for (int i = 0; i < rawFlags.length(); i+=2) {
-        char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
-        builder.append(cookedFlag);
+        char f1 = rawFlags.charAt(i);
+        char f2 = rawFlags.charAt(i+1);
+        if (f1 >= 256 || f2 >= 256) {
+          throw new IllegalArgumentException("Invalid flags (LONG flags must be double ASCII): " + rawFlags);
+        }
+        char combined = (char) (f1 << 8 | f2);
+        builder.append(combined);
       }
       
       char flags[] = new char[builder.length()];

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java?rev=1610708&r1=1610707&r2=1610708&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java Tue Jul 15 14:19:19 2014
@@ -29,5 +29,6 @@ public class TestFlagLong extends Stemme
     assertStemsTo("foo", "foo");
     assertStemsTo("foos", "foo");
     assertStemsTo("fooss");
+    assertStemsTo("foobogus");
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff?rev=1610708&r1=1610707&r2=1610708&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff Tue Jul 15 14:19:19 2014
@@ -2,3 +2,6 @@ SET UTF-8
 FLAG long
 SFX Y1 Y 1
 SFX Y1 0 s .
+
+SFX 1Y Y 1
+SFX 1Y 0 bogus .