You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2012/04/14 09:32:43 UTC

svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis: CHANGES.txt common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java

Author: dweiss
Date: Sat Apr 14 07:32:42 2012
New Revision: 1326065

URL: http://svn.apache.org/viewvc?rev=1326065&view=rev
Log:
LUCENE-3971: MappingCharFilter could return invalid final token position.
(Dawid Weiss, Robert Muir)

Modified:
    lucene/dev/trunk/modules/analysis/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java

Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt?rev=1326065&r1=1326064&r2=1326065&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/analysis/CHANGES.txt Sat Apr 14 07:32:42 2012
@@ -39,6 +39,9 @@ API Changes
 
 Bug fixes
 
+ * LUCENE-3971: MappingCharFilter could return invalid final token position.
+   (Dawid Weiss, Robert Muir)
+
  * LUCENE-3820: PatternReplaceCharFilter could return invalid token positions. 
    (Dawid Weiss)
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java?rev=1326065&r1=1326064&r2=1326065&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java Sat Apr 14 07:32:42 2012
@@ -51,6 +51,11 @@ public class MappingCharFilter extends B
   }
 
   @Override
+  protected int correct(int currentOff) {
+    return super.correct(currentOff);
+  }
+
+  @Override
   public int read() throws IOException {
     while(true) {
       if (replacement != null && charPointer < replacement.length()) {
@@ -79,11 +84,15 @@ public class MappingCharFilter extends B
   }
 
   private int nextChar() throws IOException {
-    nextCharCounter++;
     if (buffer != null && !buffer.isEmpty()) {
+      nextCharCounter++;
       return buffer.removeFirst().charValue();
     }
-    return input.read();
+    int nextChar = input.read();
+    if (nextChar != -1) {
+      nextCharCounter++;
+    }
+    return nextChar;
   }
 
   private void pushChar(int c) {
@@ -112,6 +121,8 @@ public class MappingCharFilter extends B
         if (result == null) {
           pushChar(chr);
         }
+      } else {
+        
       }
     }
     if (result == null && map.normStr != null) {

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1326065&r1=1326064&r2=1326065&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Sat Apr 14 07:32:42 2012
@@ -30,7 +30,6 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util._TestUtil;
-import org.junit.Ignore;
 
 public class TestMappingCharFilter extends BaseTokenStreamTestCase {
 
@@ -194,8 +193,8 @@ public class TestMappingCharFilter exten
     int numRounds = RANDOM_MULTIPLIER * 10000;
     checkRandomData(random, analyzer, numRounds);
   }
-  
-  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+
+  //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
   public void testFinalOffsetSpecialCase() throws Exception {  
     final NormalizeCharMap map = new NormalizeCharMap();
     map.add("t", "");
@@ -219,7 +218,7 @@ public class TestMappingCharFilter exten
     checkAnalysisConsistency(random, analyzer, false, text);
   }
   
-  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+  //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
   public void testRandomMaps() throws Exception {
     for (int i = 0; i < 100; i++) {
       final NormalizeCharMap map = randomMap();



RE: svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis: CHANGES.txt common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java

Posted by Uwe Schindler <uw...@thetaphi.de>.
> I didn't clean that patch before I committed, sorry. Feel free to remove that
> extra code 

 Done!

> even though I think this entire class should be simply rewritten from
> scratch so it won't matter much.

 I agree, the filter is not easy to understand and I am not sure about the performance of the whole code design. It looks slow :-)

Uwe


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Re: svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis: CHANGES.txt common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java

Posted by Dawid Weiss <da...@cs.put.poznan.pl>.
I didn't clean that patch before I committed, sorry. Feel free to
remove that extra code even though I think this entire class should be
simply rewritten from scratch so it won't matter much.

Dawid

On Sat, Apr 14, 2012 at 11:35 AM, Uwe Schindler <uw...@thetaphi.de> wrote:
> I am a little bit confused about useless code:
>
>   @Override
> +  protected int correct(int currentOff) {
> +    return super.correct(currentOff);
> +  }
> +
> +  @Override
>   public int read() throws IOException {
>
>
> and:
>
>   private void pushChar(int c) {
> @@ -112,6 +121,8 @@ public class MappingCharFilter extends B
>         if (result == null) {
>           pushChar(chr);
>         }
> +      } else {
> +
>       }
>     }
>     if (result == null && map.normStr != null) {
>
>
> Can we remove that again?
>
> Uwe
>
> -----
> Uwe Schindler
> H.-H.-Meier-Allee 63, D-28213 Bremen
> http://www.thetaphi.de
> eMail: uwe@thetaphi.de
>
>
>> -----Original Message-----
>> From: dweiss@apache.org [mailto:dweiss@apache.org]
>> Sent: Saturday, April 14, 2012 9:33 AM
>> To: commits@lucene.apache.org
>> Subject: svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis:
>> CHANGES.txt
>> common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.ja
>> va
>> common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilte
>> r.java
>>
>> Author: dweiss
>> Date: Sat Apr 14 07:32:42 2012
>> New Revision: 1326065
>>
>> URL: http://svn.apache.org/viewvc?rev=1326065&view=rev
>> Log:
>> LUCENE-3971: MappingCharFilter could return invalid final token position.
>> (Dawid Weiss, Robert Muir)
>>
>> Modified:
>>     lucene/dev/trunk/modules/analysis/CHANGES.txt
>>
>> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
>> ysis/charfilter/MappingCharFilter.java
>>
>> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
>> sis/charfilter/TestMappingCharFilter.java
>>
>> Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
>> URL:
>> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt
>> ?rev=1326065&r1=1326064&r2=1326065&view=diff
>> ================================================================
>> ==============
>> --- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
>> +++ lucene/dev/trunk/modules/analysis/CHANGES.txt Sat Apr 14 07:32:42
>> +++ 2012
>> @@ -39,6 +39,9 @@ API Changes
>>
>>  Bug fixes
>>
>> + * LUCENE-3971: MappingCharFilter could return invalid final token position.
>> +   (Dawid Weiss, Robert Muir)
>> +
>>   * LUCENE-3820: PatternReplaceCharFilter could return invalid token positions.
>>     (Dawid Weiss)
>>
>>
>> Modified:
>> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
>> ysis/charfilter/MappingCharFilter.java
>> URL:
>> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src
>> /java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java?rev=1326
>> 065&r1=1326064&r2=1326065&view=diff
>> ================================================================
>> ==============
>> ---
>> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
>> ysis/charfilter/MappingCharFilter.java (original)
>> +++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/
>> +++ analysis/charfilter/MappingCharFilter.java Sat Apr 14 07:32:42 2012
>> @@ -51,6 +51,11 @@ public class MappingCharFilter extends B
>>    }
>>
>>    @Override
>> +  protected int correct(int currentOff) {
>> +    return super.correct(currentOff);
>> +  }
>> +
>> +  @Override
>>    public int read() throws IOException {
>>      while(true) {
>>        if (replacement != null && charPointer < replacement.length()) { @@ -
>> 79,11 +84,15 @@ public class MappingCharFilter extends B
>>    }
>>
>>    private int nextChar() throws IOException {
>> -    nextCharCounter++;
>>      if (buffer != null && !buffer.isEmpty()) {
>> +      nextCharCounter++;
>>        return buffer.removeFirst().charValue();
>>      }
>> -    return input.read();
>> +    int nextChar = input.read();
>> +    if (nextChar != -1) {
>> +      nextCharCounter++;
>> +    }
>> +    return nextChar;
>>    }
>>
>>    private void pushChar(int c) {
>> @@ -112,6 +121,8 @@ public class MappingCharFilter extends B
>>          if (result == null) {
>>            pushChar(chr);
>>          }
>> +      } else {
>> +
>>        }
>>      }
>>      if (result == null && map.normStr != null) {
>>
>> Modified:
>> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
>> sis/charfilter/TestMappingCharFilter.java
>> URL:
>> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src
>> /test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1
>> 326065&r1=1326064&r2=1326065&view=diff
>> ================================================================
>> ==============
>> ---
>> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
>> sis/charfilter/TestMappingCharFilter.java (original)
>> +++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/
>> +++ analysis/charfilter/TestMappingCharFilter.java Sat Apr 14 07:32:42
>> +++ 2012
>> @@ -30,7 +30,6 @@ import org.apache.lucene.analysis.MockTo  import
>> org.apache.lucene.analysis.TokenStream;
>>  import org.apache.lucene.analysis.Tokenizer;
>>  import org.apache.lucene.util._TestUtil; -import org.junit.Ignore;
>>
>>  public class TestMappingCharFilter extends BaseTokenStreamTestCase {
>>
>> @@ -194,8 +193,8 @@ public class TestMappingCharFilter exten
>>      int numRounds = RANDOM_MULTIPLIER * 10000;
>>      checkRandomData(random, analyzer, numRounds);
>>    }
>> -
>> -  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-
>> 3971")
>> +
>> +  //@Ignore("wrong finalOffset:
>> + https://issues.apache.org/jira/browse/LUCENE-3971")
>>    public void testFinalOffsetSpecialCase() throws Exception {
>>      final NormalizeCharMap map = new NormalizeCharMap();
>>      map.add("t", "");
>> @@ -219,7 +218,7 @@ public class TestMappingCharFilter exten
>>      checkAnalysisConsistency(random, analyzer, false, text);
>>    }
>>
>> -  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-
>> 3971")
>> +  //@Ignore("wrong finalOffset:
>> + https://issues.apache.org/jira/browse/LUCENE-3971")
>>    public void testRandomMaps() throws Exception {
>>      for (int i = 0; i < 100; i++) {
>>        final NormalizeCharMap map = randomMap();
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


RE: svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis: CHANGES.txt common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java

Posted by Uwe Schindler <uw...@thetaphi.de>.
I am a little bit confused about useless code:

   @Override
+  protected int correct(int currentOff) {
+    return super.correct(currentOff);
+  }
+
+  @Override
   public int read() throws IOException {


and:

   private void pushChar(int c) {
@@ -112,6 +121,8 @@ public class MappingCharFilter extends B
         if (result == null) {
           pushChar(chr);
         }
+      } else {
+        
       }
     }
     if (result == null && map.normStr != null) {


Can we remove that again?

Uwe

-----
Uwe Schindler
H.-H.-Meier-Allee 63, D-28213 Bremen
http://www.thetaphi.de
eMail: uwe@thetaphi.de


> -----Original Message-----
> From: dweiss@apache.org [mailto:dweiss@apache.org]
> Sent: Saturday, April 14, 2012 9:33 AM
> To: commits@lucene.apache.org
> Subject: svn commit: r1326065 - in /lucene/dev/trunk/modules/analysis:
> CHANGES.txt
> common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.ja
> va
> common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilte
> r.java
> 
> Author: dweiss
> Date: Sat Apr 14 07:32:42 2012
> New Revision: 1326065
> 
> URL: http://svn.apache.org/viewvc?rev=1326065&view=rev
> Log:
> LUCENE-3971: MappingCharFilter could return invalid final token position.
> (Dawid Weiss, Robert Muir)
> 
> Modified:
>     lucene/dev/trunk/modules/analysis/CHANGES.txt
> 
> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
> ysis/charfilter/MappingCharFilter.java
> 
> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
> sis/charfilter/TestMappingCharFilter.java
> 
> Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt
> ?rev=1326065&r1=1326064&r2=1326065&view=diff
> ================================================================
> ==============
> --- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
> +++ lucene/dev/trunk/modules/analysis/CHANGES.txt Sat Apr 14 07:32:42
> +++ 2012
> @@ -39,6 +39,9 @@ API Changes
> 
>  Bug fixes
> 
> + * LUCENE-3971: MappingCharFilter could return invalid final token position.
> +   (Dawid Weiss, Robert Muir)
> +
>   * LUCENE-3820: PatternReplaceCharFilter could return invalid token positions.
>     (Dawid Weiss)
> 
> 
> Modified:
> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
> ysis/charfilter/MappingCharFilter.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src
> /java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java?rev=1326
> 065&r1=1326064&r2=1326065&view=diff
> ================================================================
> ==============
> ---
> lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/anal
> ysis/charfilter/MappingCharFilter.java (original)
> +++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/
> +++ analysis/charfilter/MappingCharFilter.java Sat Apr 14 07:32:42 2012
> @@ -51,6 +51,11 @@ public class MappingCharFilter extends B
>    }
> 
>    @Override
> +  protected int correct(int currentOff) {
> +    return super.correct(currentOff);
> +  }
> +
> +  @Override
>    public int read() throws IOException {
>      while(true) {
>        if (replacement != null && charPointer < replacement.length()) { @@ -
> 79,11 +84,15 @@ public class MappingCharFilter extends B
>    }
> 
>    private int nextChar() throws IOException {
> -    nextCharCounter++;
>      if (buffer != null && !buffer.isEmpty()) {
> +      nextCharCounter++;
>        return buffer.removeFirst().charValue();
>      }
> -    return input.read();
> +    int nextChar = input.read();
> +    if (nextChar != -1) {
> +      nextCharCounter++;
> +    }
> +    return nextChar;
>    }
> 
>    private void pushChar(int c) {
> @@ -112,6 +121,8 @@ public class MappingCharFilter extends B
>          if (result == null) {
>            pushChar(chr);
>          }
> +      } else {
> +
>        }
>      }
>      if (result == null && map.normStr != null) {
> 
> Modified:
> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
> sis/charfilter/TestMappingCharFilter.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src
> /test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1
> 326065&r1=1326064&r2=1326065&view=diff
> ================================================================
> ==============
> ---
> lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analy
> sis/charfilter/TestMappingCharFilter.java (original)
> +++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/
> +++ analysis/charfilter/TestMappingCharFilter.java Sat Apr 14 07:32:42
> +++ 2012
> @@ -30,7 +30,6 @@ import org.apache.lucene.analysis.MockTo  import
> org.apache.lucene.analysis.TokenStream;
>  import org.apache.lucene.analysis.Tokenizer;
>  import org.apache.lucene.util._TestUtil; -import org.junit.Ignore;
> 
>  public class TestMappingCharFilter extends BaseTokenStreamTestCase {
> 
> @@ -194,8 +193,8 @@ public class TestMappingCharFilter exten
>      int numRounds = RANDOM_MULTIPLIER * 10000;
>      checkRandomData(random, analyzer, numRounds);
>    }
> -
> -  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-
> 3971")
> +
> +  //@Ignore("wrong finalOffset:
> + https://issues.apache.org/jira/browse/LUCENE-3971")
>    public void testFinalOffsetSpecialCase() throws Exception {
>      final NormalizeCharMap map = new NormalizeCharMap();
>      map.add("t", "");
> @@ -219,7 +218,7 @@ public class TestMappingCharFilter exten
>      checkAnalysisConsistency(random, analyzer, false, text);
>    }
> 
> -  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-
> 3971")
> +  //@Ignore("wrong finalOffset:
> + https://issues.apache.org/jira/browse/LUCENE-3971")
>    public void testRandomMaps() throws Exception {
>      for (int i = 0; i < 100; i++) {
>        final NormalizeCharMap map = randomMap();



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org