You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/01 11:42:30 UTC

svn commit: r1239044 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/src/test-framework/java/org/apache/lucene/analysis/ lucene/src/test/org/apache/lucene/analysis/

Author: rmuir
Date: Wed Feb  1 10:42:29 2012
New Revision: 1239044

URL: http://svn.apache.org/viewvc?rev=1239044&view=rev
Log:
LUCENE-3741: MockCharFilter offset correction is wrong

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java

Modified: lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java Wed Feb  1 10:42:29 2012
@@ -52,7 +52,7 @@ class MockCharFilter extends CharStream 
       bufferedCh = -1;
       currentOffset++;
       
-      addOffCorrectMap(currentOffset+delta, delta-1);
+      addOffCorrectMap(currentOffset, delta-1);
       delta--;
       return ch;
     }

Modified: lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java Wed Feb  1 10:42:29 2012
@@ -69,6 +69,7 @@ public class MockTokenizer extends Token
   };
   
   private State streamState = State.CLOSE;
+  private int lastOffset = 0; // only for asserting
   private boolean enableChecks = true;
   
   public MockTokenizer(AttributeFactory factory, Reader input, int pattern, boolean lowerCase, int maxTokenLength) {
@@ -109,7 +110,15 @@ public class MockTokenizer extends Token
           }
           cp = readCodePoint();
         } while (cp >= 0 && isTokenChar(cp));
-        offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
+        
+        int correctedStartOffset = correctOffset(startOffset);
+        int correctedEndOffset = correctOffset(endOffset);
+        assert correctedStartOffset >= 0;
+        assert correctedEndOffset >= 0;
+        assert correctedStartOffset >= lastOffset;
+        lastOffset = correctedStartOffset;
+        assert correctedEndOffset >= correctedStartOffset;
+        offsetAtt.setOffset(correctedStartOffset, correctedEndOffset);
         streamState = State.INCREMENT;
         return true;
       }
@@ -157,7 +166,7 @@ public class MockTokenizer extends Token
   @Override
   public void reset() throws IOException {
     super.reset();
-    off = 0;
+    lastOffset = off = 0;
     assert !enableChecks || streamState != State.RESET : "double reset()";
     streamState = State.RESET;
   }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Wed Feb  1 10:42:29 2012
@@ -2,6 +2,8 @@ package org.apache.lucene.analysis;
 
 import java.io.StringReader;
 
+import org.apache.lucene.util._TestUtil;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -87,4 +89,21 @@ public class TestMockAnalyzer extends Ba
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new MockAnalyzer(random), atLeast(1000));
   }
+  
+  public void testForwardOffsets() throws Exception {
+    int num = atLeast(10000);
+    for (int i = 0; i < num; i++) {
+      String s = _TestUtil.randomHtmlishString(random, 20);
+      StringReader reader = new StringReader(s);
+      MockCharFilter charfilter = new MockCharFilter(CharReader.get(reader), 2);
+      MockAnalyzer analyzer = new MockAnalyzer(random);
+      TokenStream ts = analyzer.tokenStream("bogus", charfilter);
+      ts.reset();
+      while (ts.incrementToken()) {
+        ;
+      }
+      ts.end();
+      ts.close();
+    }
+  }
 }