You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by Koji Sekiguchi <ko...@r.email.ne.jp> on 2010/09/27 18:42:07 UTC

Re: svn commit: r1001796 - in /lucene/dev/trunk/lucene: ./ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ src/java/org/apache/lucene/index/ src/test/org/apache/l...

  I'll backport to 3x tomorrow.

Koji

-- 
http://www.rondhuit.com/en/

(10/09/28 1:10), koji@apache.org wrote:
> Author: koji
> Date: Mon Sep 27 16:10:29 2010
> New Revision: 1001796
>
> URL: http://svn.apache.org/viewvc?rev=1001796&view=rev
> Log:
> LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap between values
>
> Modified:
>      lucene/dev/trunk/lucene/CHANGES.txt
>      lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
>      lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
>      lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
>      lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
>      lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
>
> Modified: lucene/dev/trunk/lucene/CHANGES.txt
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/CHANGES.txt (original)
> +++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 27 16:10:29 2010
> @@ -108,6 +108,11 @@ Changes in backwards compatibility polic
>   * LUCENE-2600: Remove IndexReader.isDeleted in favor of
>     IndexReader.getDeletedDocs().  (Mike McCandless)
>
> +* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
> +  values in multi-valued field has been changed for some cases in index.
> +  If you index empty fields and uses positions/offsets information on that
> +  fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
> +
>   Changes in Runtime Behavior
>
>   * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Mon Sep 27 16:10:29 2010
> @@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuild
>         int startOffset, int endOffset ){
>       while( buffer.length()<  endOffset&&  index[0]<  values.length ){
>         buffer.append( values[index[0]] );
> -      if( values[index[0]].length()>  0&&  index[0] + 1<  values.length )
> -        buffer.append( multiValuedSeparator );
> +      buffer.append( multiValuedSeparator );
>         index[0]++;
>       }
>       int eo = buffer.length()<  endOffset ? buffer.length() : endOffset;
> @@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuild
>         int startOffset, int endOffset ){
>       while( buffer.length()<  endOffset&&  index[0]<  values.length ){
>         buffer.append( values[index[0]].stringValue() );
> -      if( values[index[0]].isTokenized()&&  values[index[0]].stringValue().length()>  0&&  index[0] + 1<  values.length )
> +      if( values[index[0]].isTokenized() )
>           buffer.append( multiValuedSeparator );
>         index[0]++;
>       }
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Mon Sep 27 16:10:29 2010
> @@ -355,16 +355,20 @@ public abstract class AbstractTestCase e
>
>     protected void makeIndexShortMV() throws Exception {
>
> +    //  0
>       // ""
> +    //  1
>       // ""
>
> -    //  012345
> +    //  234567
>       // "a b c"
>       //  0 1 2
> -
> +
> +    //  8
>       // ""
>
> -    //  6789
> +    //   111
> +    //  9012
>       // "d e"
>       //  3 4
>       make1dmfIndex( shortMVValues );
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java Mon Sep 27 16:10:29 2010
> @@ -165,7 +165,7 @@ public class FieldPhraseListTest extends
>       FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
>       FieldPhraseList fpl = new FieldPhraseList( stack, fq );
>       assertEquals( 1, fpl.phraseList.size() );
> -    assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
> +    assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
>     }
>
>     public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java Mon Sep 27 16:10:29 2010
> @@ -132,7 +132,7 @@ public class FieldTermStackTest extends
>       FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
>       FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
>       assertEquals( 1, stack.termList.size() );
> -    assertEquals( "d(6,7,3)", stack.pop().toString() );
> +    assertEquals( "d(9,10,3)", stack.pop().toString() );
>     }
>
>     public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderT
>       String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
>       assertEquals( 3, f.length );
>       // check score order
> -    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b", f[0] );
> +    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b ", f[0] );
>       assertEquals( "b b<b>a</b>  b<b>a</b>  b b b b b ", f[1] );
>       assertEquals( "<b>a</b>  b b b b b b b b b ", f[2] );
>     }
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest e
>       SimpleFragListBuilder sflb = new SimpleFragListBuilder();
>       FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
>       assertEquals( 1, ffl.fragInfos.size() );
> -    assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
> +    assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
>     }
>
>     public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest
>     public void test1TermIndex() throws Exception {
>       FieldFragList ffl = ffl( "a", "a" );
>       SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> -    assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
> +    assertEquals( "<b>a</b>  ", sfb.createFragment( reader, 0, F, ffl ) );
>
>       // change tags
>       sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
> -    assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
> +    assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
>     }
>
>     public void test2Frags() throws Exception {
> @@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest
>       // 3 snippets requested, but should be 2
>       assertEquals( 2, f.length );
>       assertEquals( "<b>a</b>  b b b b b b b b b ", f[0] );
> -    assertEquals( "b b<b>a</b>  b<b>a</b>  b", f[1] );
> +    assertEquals( "b b<b>a</b>  b<b>a</b>  b ", f[1] );
>     }
>
>     public void test3Frags() throws Exception {
> @@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest
>       assertEquals( 3, f.length );
>       assertEquals( "<b>a</b>  b b b b b b b b b ", f[0] );
>       assertEquals( "b b<b>a</b>  b<b>a</b>  b b b b b ", f[1] );
> -    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b", f[2] );
> +    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b ", f[2] );
>     }
>
>     public void testTagsAndEncoder() throws Exception {
> @@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest
>       SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
>       String[] preTags = { "[" };
>       String[] postTags = { "]" };
> -    assertEquals( "&lt;h1&gt; [a]&lt;/h1&gt;",
> +    assertEquals( "&lt;h1&gt; [a]&lt;/h1&gt; ",
>           sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
>     }
>
> @@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest
>       SimpleFragListBuilder sflb = new SimpleFragListBuilder();
>       FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
>       SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> -    assertEquals( "a b c<b>d</b>  e", sfb.createFragment( reader, 0, F, ffl ) );
> +    assertEquals( " b c<b>d</b>  e ", sfb.createFragment( reader, 0, F, ffl ) );
>     }
>
>     public void test1PhraseLongMV() throws Exception {
> @@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest
>       SimpleFragListBuilder sflb = new SimpleFragListBuilder();
>       FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
>       SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> -    assertEquals( "ssing<b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
> +    assertEquals( "ssing<b>speed</b>, the ", sfb.createFragment( reader, 0, F, ffl ) );
>     }
>
>     public void testUnstoredField() throws Exception {
> @@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest
>       FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
>       SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
>       sfb.setMultiValuedSeparator( '/' );
> -    assertEquals( "a b c/<b>d</b>  e", sfb.createFragment( reader, 0, F, ffl ) );
> +    assertEquals( " b c//<b>d</b>  e/", sfb.createFragment( reader, 0, F, ffl ) );
>     }
>   }
>
> Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Mon Sep 27 16:10:29 2010
> @@ -75,10 +75,8 @@ final class DocInverterPerField extends
>         // consumer if it wants to see this particular field
>         // tokenized.
>         if (field.isIndexed()&&  doInvert) {
> -
> -        final boolean anyToken;
>
> -        if (fieldState.length>  0)
> +        if (i>  0)
>             fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
>
>           if (!field.isTokenized()) {		  // un-tokenized field
> @@ -99,7 +97,6 @@ final class DocInverterPerField extends
>             fieldState.offset += valueLength;
>             fieldState.length++;
>             fieldState.position++;
> -          anyToken = valueLength>  0;
>           } else {                                  // tokenized field
>             final TokenStream stream;
>             final TokenStream streamValue = field.tokenStreamValue();
> @@ -189,14 +186,12 @@ final class DocInverterPerField extends
>               stream.end();
>
>               fieldState.offset += offsetAttribute.endOffset();
> -            anyToken = fieldState.length>  startLength;
>             } finally {
>               stream.close();
>             }
>           }
>
> -        if (anyToken)
> -          fieldState.offset += docState.analyzer.getOffsetGap(field);
> +        fieldState.offset += docState.analyzer.getOffsetGap(field);
>           fieldState.boost *= field.getBoost();
>         }
>
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java Mon Sep 27 16:10:29 2010
> @@ -30,6 +30,7 @@ public final class MockAnalyzer extends
>     private final boolean lowerCase;
>     private final CharacterRunAutomaton filter;
>     private final boolean enablePositionIncrements;
> +  private int positionIncrementGap;
>
>     /**
>      * Creates a new MockAnalyzer.
> @@ -89,4 +90,13 @@ public final class MockAnalyzer extends
>         return saved.filter;
>       }
>     }
> +
> +  public void setPositionIncrementGap(int positionIncrementGap){
> +    this.positionIncrementGap = positionIncrementGap;
> +  }
> +
> +  @Override
> +  public int getPositionIncrementGap(String fieldName){
> +    return positionIncrementGap;
> +  }
>   }
> \ No newline at end of file
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Mon Sep 27 16:10:29 2010
> @@ -4266,11 +4266,11 @@ public class TestIndexWriter extends Luc
>       TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
>       TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
>       assertEquals(1, termOffsets.length);
> -    assertEquals(0, termOffsets[0].getStartOffset());
> -    assertEquals(6, termOffsets[0].getEndOffset());
> +    assertEquals(1, termOffsets[0].getStartOffset());
> +    assertEquals(7, termOffsets[0].getEndOffset());
>       termOffsets = tpv.getOffsets(1);
> -    assertEquals(7, termOffsets[0].getStartOffset());
> -    assertEquals(10, termOffsets[0].getEndOffset());
> +    assertEquals(8, termOffsets[0].getStartOffset());
> +    assertEquals(11, termOffsets[0].getEndOffset());
>       r.close();
>       dir.close();
>     }
> @@ -4301,8 +4301,37 @@ public class TestIndexWriter extends Luc
>       assertEquals(0, termOffsets[0].getStartOffset());
>       assertEquals(4, termOffsets[0].getEndOffset());
>       termOffsets = tpv.getOffsets(1);
> -    assertEquals(5, termOffsets[0].getStartOffset());
> -    assertEquals(11, termOffsets[0].getEndOffset());
> +    assertEquals(6, termOffsets[0].getStartOffset());
> +    assertEquals(12, termOffsets[0].getEndOffset());
> +    r.close();
> +    dir.close();
> +  }
> +
> +  // LUCENE-2529
> +  public void testPositionIncrementGapEmptyField() throws Exception {
> +    Directory dir = newDirectory();
> +    MockAnalyzer analyzer = new MockAnalyzer();
> +    analyzer.setPositionIncrementGap( 100 );
> +    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
> +        TEST_VERSION_CURRENT, analyzer));
> +    Document doc = new Document();
> +    Field f = newField("field", "", Field.Store.NO,
> +                        Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
> +    Field f2 = newField("field", "crunch man", Field.Store.NO,
> +        Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
> +    doc.add(f);
> +    doc.add(f2);
> +    w.addDocument(doc);
> +    w.close();
> +
> +    IndexReader r = IndexReader.open(dir, true);
> +    TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
> +    int[] poss = tpv.getTermPositions(0);
> +    assertEquals(1, poss.length);
> +    assertEquals(100, poss[0]);
> +    poss = tpv.getTermPositions(1);
> +    assertEquals(1, poss.length);
> +    assertEquals(101, poss[0]);
>       r.close();
>       dir.close();
>     }
>
>
>


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org