You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by Koji Sekiguchi <ko...@r.email.ne.jp> on 2010/09/27 18:42:07 UTC
Re: svn commit: r1001796 - in /lucene/dev/trunk/lucene: ./ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ src/java/org/apache/lucene/index/
src/test/org/apache/l...
I'll backport to 3x tomorrow.
Koji
--
http://www.rondhuit.com/en/
(10/09/28 1:10), koji@apache.org wrote:
> Author: koji
> Date: Mon Sep 27 16:10:29 2010
> New Revision: 1001796
>
> URL: http://svn.apache.org/viewvc?rev=1001796&view=rev
> Log:
> LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap between values
>
> Modified:
> lucene/dev/trunk/lucene/CHANGES.txt
> lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
> lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
>
> Modified: lucene/dev/trunk/lucene/CHANGES.txt
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/CHANGES.txt (original)
> +++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 27 16:10:29 2010
> @@ -108,6 +108,11 @@ Changes in backwards compatibility polic
> * LUCENE-2600: Remove IndexReader.isDeleted in favor of
> IndexReader.getDeletedDocs(). (Mike McCandless)
>
> +* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
> + values in multi-valued field has been changed for some cases in index.
> + If you index empty fields and uses positions/offsets information on that
> + fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
> +
> Changes in Runtime Behavior
>
> * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Mon Sep 27 16:10:29 2010
> @@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuild
> int startOffset, int endOffset ){
> while( buffer.length()< endOffset&& index[0]< values.length ){
> buffer.append( values[index[0]] );
> - if( values[index[0]].length()> 0&& index[0] + 1< values.length )
> - buffer.append( multiValuedSeparator );
> + buffer.append( multiValuedSeparator );
> index[0]++;
> }
> int eo = buffer.length()< endOffset ? buffer.length() : endOffset;
> @@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuild
> int startOffset, int endOffset ){
> while( buffer.length()< endOffset&& index[0]< values.length ){
> buffer.append( values[index[0]].stringValue() );
> - if( values[index[0]].isTokenized()&& values[index[0]].stringValue().length()> 0&& index[0] + 1< values.length )
> + if( values[index[0]].isTokenized() )
> buffer.append( multiValuedSeparator );
> index[0]++;
> }
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Mon Sep 27 16:10:29 2010
> @@ -355,16 +355,20 @@ public abstract class AbstractTestCase e
>
> protected void makeIndexShortMV() throws Exception {
>
> + // 0
> // ""
> + // 1
> // ""
>
> - // 012345
> + // 234567
> // "a b c"
> // 0 1 2
> -
> +
> + // 8
> // ""
>
> - // 6789
> + // 111
> + // 9012
> // "d e"
> // 3 4
> make1dmfIndex( shortMVValues );
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java Mon Sep 27 16:10:29 2010
> @@ -165,7 +165,7 @@ public class FieldPhraseListTest extends
> FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
> FieldPhraseList fpl = new FieldPhraseList( stack, fq );
> assertEquals( 1, fpl.phraseList.size() );
> - assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
> + assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
> }
>
> public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java Mon Sep 27 16:10:29 2010
> @@ -132,7 +132,7 @@ public class FieldTermStackTest extends
> FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
> FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
> assertEquals( 1, stack.termList.size() );
> - assertEquals( "d(6,7,3)", stack.pop().toString() );
> + assertEquals( "d(9,10,3)", stack.pop().toString() );
> }
>
> public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderT
> String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
> assertEquals( 3, f.length );
> // check score order
> - assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[0] );
> + assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[0] );
> assertEquals( "b b<b>a</b> b<b>a</b> b b b b b ", f[1] );
> assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
> }
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest e
> SimpleFragListBuilder sflb = new SimpleFragListBuilder();
> FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
> assertEquals( 1, ffl.fragInfos.size() );
> - assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
> + assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
> }
>
> public void test1PhraseLongMV() throws Exception {
>
> Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
> +++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Mon Sep 27 16:10:29 2010
> @@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest
> public void test1TermIndex() throws Exception {
> FieldFragList ffl = ffl( "a", "a" );
> SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> - assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
> + assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
>
> // change tags
> sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
> - assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
> + assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
> }
>
> public void test2Frags() throws Exception {
> @@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest
> // 3 snippets requested, but should be 2
> assertEquals( 2, f.length );
> assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
> - assertEquals( "b b<b>a</b> b<b>a</b> b", f[1] );
> + assertEquals( "b b<b>a</b> b<b>a</b> b ", f[1] );
> }
>
> public void test3Frags() throws Exception {
> @@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest
> assertEquals( 3, f.length );
> assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
> assertEquals( "b b<b>a</b> b<b>a</b> b b b b b ", f[1] );
> - assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[2] );
> + assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[2] );
> }
>
> public void testTagsAndEncoder() throws Exception {
> @@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest
> SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> String[] preTags = { "[" };
> String[] postTags = { "]" };
> - assertEquals( "<h1> [a]</h1>",
> + assertEquals( "<h1> [a]</h1> ",
> sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
> }
>
> @@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest
> SimpleFragListBuilder sflb = new SimpleFragListBuilder();
> FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
> SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> - assertEquals( "a b c<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
> + assertEquals( " b c<b>d</b> e ", sfb.createFragment( reader, 0, F, ffl ) );
> }
>
> public void test1PhraseLongMV() throws Exception {
> @@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest
> SimpleFragListBuilder sflb = new SimpleFragListBuilder();
> FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
> SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> - assertEquals( "ssing<b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
> + assertEquals( "ssing<b>speed</b>, the ", sfb.createFragment( reader, 0, F, ffl ) );
> }
>
> public void testUnstoredField() throws Exception {
> @@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest
> FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
> SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
> sfb.setMultiValuedSeparator( '/' );
> - assertEquals( "a b c/<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
> + assertEquals( " b c//<b>d</b> e/", sfb.createFragment( reader, 0, F, ffl ) );
> }
> }
>
> Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Mon Sep 27 16:10:29 2010
> @@ -75,10 +75,8 @@ final class DocInverterPerField extends
> // consumer if it wants to see this particular field
> // tokenized.
> if (field.isIndexed()&& doInvert) {
> -
> - final boolean anyToken;
>
> - if (fieldState.length> 0)
> + if (i> 0)
> fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
>
> if (!field.isTokenized()) { // un-tokenized field
> @@ -99,7 +97,6 @@ final class DocInverterPerField extends
> fieldState.offset += valueLength;
> fieldState.length++;
> fieldState.position++;
> - anyToken = valueLength> 0;
> } else { // tokenized field
> final TokenStream stream;
> final TokenStream streamValue = field.tokenStreamValue();
> @@ -189,14 +186,12 @@ final class DocInverterPerField extends
> stream.end();
>
> fieldState.offset += offsetAttribute.endOffset();
> - anyToken = fieldState.length> startLength;
> } finally {
> stream.close();
> }
> }
>
> - if (anyToken)
> - fieldState.offset += docState.analyzer.getOffsetGap(field);
> + fieldState.offset += docState.analyzer.getOffsetGap(field);
> fieldState.boost *= field.getBoost();
> }
>
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java Mon Sep 27 16:10:29 2010
> @@ -30,6 +30,7 @@ public final class MockAnalyzer extends
> private final boolean lowerCase;
> private final CharacterRunAutomaton filter;
> private final boolean enablePositionIncrements;
> + private int positionIncrementGap;
>
> /**
> * Creates a new MockAnalyzer.
> @@ -89,4 +90,13 @@ public final class MockAnalyzer extends
> return saved.filter;
> }
> }
> +
> + public void setPositionIncrementGap(int positionIncrementGap){
> + this.positionIncrementGap = positionIncrementGap;
> + }
> +
> + @Override
> + public int getPositionIncrementGap(String fieldName){
> + return positionIncrementGap;
> + }
> }
> \ No newline at end of file
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1001796&r1=1001795&r2=1001796&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Mon Sep 27 16:10:29 2010
> @@ -4266,11 +4266,11 @@ public class TestIndexWriter extends Luc
> TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
> TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
> assertEquals(1, termOffsets.length);
> - assertEquals(0, termOffsets[0].getStartOffset());
> - assertEquals(6, termOffsets[0].getEndOffset());
> + assertEquals(1, termOffsets[0].getStartOffset());
> + assertEquals(7, termOffsets[0].getEndOffset());
> termOffsets = tpv.getOffsets(1);
> - assertEquals(7, termOffsets[0].getStartOffset());
> - assertEquals(10, termOffsets[0].getEndOffset());
> + assertEquals(8, termOffsets[0].getStartOffset());
> + assertEquals(11, termOffsets[0].getEndOffset());
> r.close();
> dir.close();
> }
> @@ -4301,8 +4301,37 @@ public class TestIndexWriter extends Luc
> assertEquals(0, termOffsets[0].getStartOffset());
> assertEquals(4, termOffsets[0].getEndOffset());
> termOffsets = tpv.getOffsets(1);
> - assertEquals(5, termOffsets[0].getStartOffset());
> - assertEquals(11, termOffsets[0].getEndOffset());
> + assertEquals(6, termOffsets[0].getStartOffset());
> + assertEquals(12, termOffsets[0].getEndOffset());
> + r.close();
> + dir.close();
> + }
> +
> + // LUCENE-2529
> + public void testPositionIncrementGapEmptyField() throws Exception {
> + Directory dir = newDirectory();
> + MockAnalyzer analyzer = new MockAnalyzer();
> + analyzer.setPositionIncrementGap( 100 );
> + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
> + TEST_VERSION_CURRENT, analyzer));
> + Document doc = new Document();
> + Field f = newField("field", "", Field.Store.NO,
> + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
> + Field f2 = newField("field", "crunch man", Field.Store.NO,
> + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
> + doc.add(f);
> + doc.add(f2);
> + w.addDocument(doc);
> + w.close();
> +
> + IndexReader r = IndexReader.open(dir, true);
> + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
> + int[] poss = tpv.getTermPositions(0);
> + assertEquals(1, poss.length);
> + assertEquals(100, poss[0]);
> + poss = tpv.getTermPositions(1);
> + assertEquals(1, poss.length);
> + assertEquals(101, poss[0]);
> r.close();
> dir.close();
> }
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org