You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by mv...@apache.org on 2017/06/08 10:23:01 UTC

lucene-solr:branch_6_6: LUCENE-7869: Changed MemoryIndex to sort 1d points.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6_6 0238eeef2 -> 703053094


LUCENE-7869: Changed MemoryIndex to sort 1d points.

In case of 1d points, the PointInSetQuery.MergePointVisitor expects that these points are visited in ascending order.
Prior to this change the memory index doesn't do this and this can result in document with multiple points that should match to not match.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/70305309
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/70305309
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/70305309

Branch: refs/heads/branch_6_6
Commit: 70305309471e906c8f64e4203aba475445278938
Parents: 0238eee
Author: Martijn van Groningen <ma...@gmail.com>
Authored: Wed Jun 7 19:55:32 2017 +0200
Committer: Martijn van Groningen <ma...@gmail.com>
Committed: Thu Jun 8 12:15:30 2017 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              | 29 +++++-----------
 .../apache/lucene/index/memory/MemoryIndex.java | 35 ++++++++++++--------
 .../lucene/index/memory/TestMemoryIndex.java    | 24 ++++++++++++++
 3 files changed, 54 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f1bddde..1dfebfe 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -4,7 +4,12 @@ For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions
 
 ======================= Lucene 6.6.1 =======================
-(No Changes)
+
+Bug Fixes
+
+* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d points, the PointInSetQuery.MergePointVisitor expects
+  that these points are visited in ascending order. The memory index doesn't do this and this can result in document
+  with multiple points that should match to not match. (Martijn van Groningen)
 
 ======================= Lucene 6.6.0 =======================
 
@@ -102,9 +107,6 @@ Other
 * LUCENE-7761: Fixed comment in ReqExclScorer.
   (Pablo Pita Leira via Adrien Grand)
 
-* LUCENE-7815: Deprecate the PostingsHighlighter. It evolved into the
-  UnifiedHighlighter. (David Smiley)
-
 ======================= Lucene 6.5.1 =======================
 
 Bug Fixes
@@ -118,9 +120,6 @@ Bug Fixes
 * LUCENE-7769: The UnifiedHighligter wasn't highlighting portions of the query
   wrapped in BoostQuery or SpanBoostQuery. (David Smiley, Dmitry Malinin)
 
-* LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.
-  (Przemyslaw Szeremiota via Jim Ferenczi)
-
 Other
 
 * LUCENE-7763: Remove outdated comment in IndexWriterConfig.setIndexSort javadocs.
@@ -170,16 +169,15 @@ API Changes
   instead of once all shard responses are present. (Simon Willnauer,
   Mike McCandless)
 
-* LUCENE-6819: Index-time boosts are deprecated. As a replacement, index-time
-  scoring factors should be indexed into a doc value field and combined at
-  query time using eg. FunctionScoreQuery. (Adrien Grand)
-
 * LUCENE-7700: A cleanup of merge throughput control logic. Refactored all the
   code previously scattered throughout the IndexWriter and 
   ConcurrentMergeScheduler into a more accessible set of public methods (see 
   MergePolicy.OneMergeProgress, MergeScheduler.wrapForMerge and 
   OneMerge.mergeInit). (Dawid Weiss, Mike McCandless).
 
+* LUCENE-7734: FieldType's copy constructor was widened to accept any IndexableFieldType.
+  (David Smiley)
+
 New Features
 
 * LUCENE-7738: Add new InetAddressRange for indexing and querying InetAddress
@@ -343,10 +341,6 @@ Other
 * LUCENE-7658: queryparser/xml CoreParser now implements SpanQueryBuilder interface.
   (Daniel Collins, Christine Poerschke)
 
-* LUCENE-7664: GeoPointField and its queries are deprecated in favor
-  of LatLonPoint, which offers faster indexing and searching
-  performance, smaller index, and less search-time heap usage.  (Mike McCandless)
-
 * LUCENE-7715: NearSpansUnordered simplifications.
   (Paul Elschot via Adrien Grand)
 
@@ -567,11 +561,6 @@ Optimizations
 
 * LUCENE-7572: Doc values queries now cache their hash code. (Adrien Grand)
 
-* LUCENE-7579: Segments are now also sorted during flush, and merging
-  on a sorted index is substantially faster by using some of the same
-  bulk merge optimizations that non-sorted merging uses (Jim Ferenczi
-  via Mike McCandless)
-
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
----------------------------------------------------------------------
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index a1390b3..c8a4523 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -929,20 +929,27 @@ public class MemoryIndex {
 
           final int numDimensions = fieldInfo.getPointDimensionCount();
           final int numBytesPerDimension = fieldInfo.getPointNumBytes();
-          minPackedValue = pointValues[0].bytes.clone();
-          maxPackedValue = pointValues[0].bytes.clone();
-
-          for (int i = 0; i < pointValuesCount; i++) {
-            BytesRef pointValue = pointValues[i];
-            assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
-
-            for (int dim = 0; dim < numDimensions; ++dim) {
-              int offset = dim * numBytesPerDimension;
-              if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
-                System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
-              }
-              if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
-                System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
+          if (numDimensions == 1) {
+            // PointInSetQuery.MergePointVisitor expects values to be visited in increasing order,
+            // this is a 1d optimization which has to be done here too. Otherwise we emit values
+            // out of order which causes mismatches.
+            Arrays.sort(pointValues, 0, pointValuesCount);
+            minPackedValue = pointValues[0].bytes.clone();
+            maxPackedValue = pointValues[pointValuesCount - 1].bytes.clone();
+          } else {
+            minPackedValue = pointValues[0].bytes.clone();
+            maxPackedValue = pointValues[0].bytes.clone();
+            for (int i = 0; i < pointValuesCount; i++) {
+              BytesRef pointValue = pointValues[i];
+              assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
+              for (int dim = 0; dim < numDimensions; ++dim) {
+                int offset = dim * numBytesPerDimension;
+                if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
+                  System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
+                }
+                if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
+                  System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
+                }
               }
             }
           }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
----------------------------------------------------------------------
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index 75344e3..8df1afb 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -476,6 +476,30 @@ public class TestMemoryIndex extends LuceneTestCase {
     assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
   }
 
+  public void testMultiValuedPointsSortedCorrectly() throws Exception {
+    Document doc = new Document();
+    doc.add(new IntPoint("ints", 3));
+    doc.add(new IntPoint("ints", 2));
+    doc.add(new IntPoint("ints", 1));
+    doc.add(new LongPoint("longs", 3L));
+    doc.add(new LongPoint("longs", 2L));
+    doc.add(new LongPoint("longs", 1L));
+    doc.add(new FloatPoint("floats", 3F));
+    doc.add(new FloatPoint("floats", 2F));
+    doc.add(new FloatPoint("floats", 1F));
+    doc.add(new DoublePoint("doubles", 3D));
+    doc.add(new DoublePoint("doubles", 2D));
+    doc.add(new DoublePoint("doubles", 1D));
+
+    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
+    IndexSearcher s = mi.createSearcher();
+
+    assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
+    assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
+    assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
+    assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
+  }
+
   public void testIndexingPointsAndDocValues() throws Exception {
     FieldType type = new FieldType();
     type.setDimensions(1, 4);

Re: lucene-solr:branch_6_6: LUCENE-7869: Changed MemoryIndex to sort 1d points.

Posted by Martijn v Groningen <ma...@gmail.com>.

Thanks for noticing this Alan! I've reverted the changes to this file and
then re-added the entry to CHANGES.txt in two separate commits.

On Thu, Jun 8, 2017 at 1:23 PM Martijn v Groningen <
martijn.v.groningen@gmail.com> wrote:

> Yes, this is a mistake. Let me fix this.
>
> On Thu, Jun 8, 2017 at 1:16 PM Alan Woodward <al...@flax.co.uk> wrote:
>
>> There’s a few unrelated edits to CHANGES.txt here which I think might be
>> a mistake?
>>
>> > On 8 Jun 2017, at 11:23, mvg@apache.org wrote:
>> >
>> > Repository: lucene-solr
>> > Updated Branches:
>> >  refs/heads/branch_6_6 0238eeef2 -> 703053094
>> >
>> >
>> > LUCENE-7869: Changed MemoryIndex to sort 1d points.
>> >
>> > In case of 1d points, the PointInSetQuery.MergePointVisitor expects
>> that these points are visited in ascending order.
>> > Prior to this change the memory index doesn't do this and this can
>> result in document with multiple points that should match to not match.
>> >
>> >
>> > Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
>> > Commit:
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/70305309
>> > Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/70305309
>> > Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/70305309
>> >
>> > Branch: refs/heads/branch_6_6
>> > Commit: 70305309471e906c8f64e4203aba475445278938
>> > Parents: 0238eee
>> > Author: Martijn van Groningen <ma...@gmail.com>
>> > Authored: Wed Jun 7 19:55:32 2017 +0200
>> > Committer: Martijn van Groningen <ma...@gmail.com>
>> > Committed: Thu Jun 8 12:15:30 2017 +0200
>> >
>> > ----------------------------------------------------------------------
>> > lucene/CHANGES.txt                              | 29 +++++-----------
>> > .../apache/lucene/index/memory/MemoryIndex.java | 35
>> ++++++++++++--------
>> > .../lucene/index/memory/TestMemoryIndex.java    | 24 ++++++++++++++
>> > 3 files changed, 54 insertions(+), 34 deletions(-)
>> > ----------------------------------------------------------------------
>> >
>> >
>> >
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/CHANGES.txt
>> > ----------------------------------------------------------------------
>> > diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
>> > index f1bddde..1dfebfe 100644
>> > --- a/lucene/CHANGES.txt
>> > +++ b/lucene/CHANGES.txt
>> > @@ -4,7 +4,12 @@ For more information on past and future Lucene
>> versions, please see:
>> > http://s.apache.org/luceneversions
>> >
>> > ======================= Lucene 6.6.1 =======================
>> > -(No Changes)
>> > +
>> > +Bug Fixes
>> > +
>> > +* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d
>> points, the PointInSetQuery.MergePointVisitor expects
>> > +  that these points are visited in ascending order. The memory index
>> doesn't do this and this can result in document
>> > +  with multiple points that should match to not match. (Martijn van
>> Groningen)
>> >
>> > ======================= Lucene 6.6.0 =======================
>> >
>> > @@ -102,9 +107,6 @@ Other
>> > * LUCENE-7761: Fixed comment in ReqExclScorer.
>> >   (Pablo Pita Leira via Adrien Grand)
>> >
>> > -* LUCENE-7815: Deprecate the PostingsHighlighter. It evolved into the
>> > -  UnifiedHighlighter. (David Smiley)
>> > -
>> > ======================= Lucene 6.5.1 =======================
>> >
>> > Bug Fixes
>> > @@ -118,9 +120,6 @@ Bug Fixes
>> > * LUCENE-7769: The UnifiedHighligter wasn't highlighting portions of
>> the query
>> >   wrapped in BoostQuery or SpanBoostQuery. (David Smiley, Dmitry
>> Malinin)
>> >
>> > -* LUCENE-7791: Fixed index sorting to work with sparse numeric and
>> binary docvalues field.
>> > -  (Przemyslaw Szeremiota via Jim Ferenczi)
>> > -
>> > Other
>> >
>> > * LUCENE-7763: Remove outdated comment in
>> IndexWriterConfig.setIndexSort javadocs.
>> > @@ -170,16 +169,15 @@ API Changes
>> >   instead of once all shard responses are present. (Simon Willnauer,
>> >   Mike McCandless)
>> >
>> > -* LUCENE-6819: Index-time boosts are deprecated. As a replacement,
>> index-time
>> > -  scoring factors should be indexed into a doc value field and
>> combined at
>> > -  query time using eg. FunctionScoreQuery. (Adrien Grand)
>> > -
>> > * LUCENE-7700: A cleanup of merge throughput control logic. Refactored
>> all the
>> >   code previously scattered throughout the IndexWriter and
>> >   ConcurrentMergeScheduler into a more accessible set of public methods
>> (see
>> >   MergePolicy.OneMergeProgress, MergeScheduler.wrapForMerge and
>> >   OneMerge.mergeInit). (Dawid Weiss, Mike McCandless).
>> >
>> > +* LUCENE-7734: FieldType's copy constructor was widened to accept any
>> IndexableFieldType.
>> > +  (David Smiley)
>> > +
>> > New Features
>> >
>> > * LUCENE-7738: Add new InetAddressRange for indexing and querying
>> InetAddress
>> > @@ -343,10 +341,6 @@ Other
>> > * LUCENE-7658: queryparser/xml CoreParser now implements
>> SpanQueryBuilder interface.
>> >   (Daniel Collins, Christine Poerschke)
>> >
>> > -* LUCENE-7664: GeoPointField and its queries are deprecated in favor
>> > -  of LatLonPoint, which offers faster indexing and searching
>> > -  performance, smaller index, and less search-time heap usage.  (Mike
>> McCandless)
>> > -
>> > * LUCENE-7715: NearSpansUnordered simplifications.
>> >   (Paul Elschot via Adrien Grand)
>> >
>> > @@ -567,11 +561,6 @@ Optimizations
>> >
>> > * LUCENE-7572: Doc values queries now cache their hash code. (Adrien
>> Grand)
>> >
>> > -* LUCENE-7579: Segments are now also sorted during flush, and merging
>> > -  on a sorted index is substantially faster by using some of the same
>> > -  bulk merge optimizations that non-sorted merging uses (Jim Ferenczi
>> > -  via Mike McCandless)
>> > -
>> > Other
>> >
>> > * LUCENE-7546: Fixed references to benchmark wikipedia data and the
>> Jenkins line-docs file
>> >
>> >
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
>> > ----------------------------------------------------------------------
>> > diff --git
>> a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
>> b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
>> > index a1390b3..c8a4523 100644
>> > ---
>> a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
>> > +++
>> b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
>> > @@ -929,20 +929,27 @@ public class MemoryIndex {
>> >
>> >           final int numDimensions = fieldInfo.getPointDimensionCount();
>> >           final int numBytesPerDimension = fieldInfo.getPointNumBytes();
>> > -          minPackedValue = pointValues[0].bytes.clone();
>> > -          maxPackedValue = pointValues[0].bytes.clone();
>> > -
>> > -          for (int i = 0; i < pointValuesCount; i++) {
>> > -            BytesRef pointValue = pointValues[i];
>> > -            assert pointValue.bytes.length == pointValue.length :
>> "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take
>> care of this";
>> > -
>> > -            for (int dim = 0; dim < numDimensions; ++dim) {
>> > -              int offset = dim * numBytesPerDimension;
>> > -              if (StringHelper.compare(numBytesPerDimension,
>> pointValue.bytes, offset, minPackedValue, offset) < 0) {
>> > -                System.arraycopy(pointValue.bytes, offset,
>> minPackedValue, offset, numBytesPerDimension);
>> > -              }
>> > -              if (StringHelper.compare(numBytesPerDimension,
>> pointValue.bytes, offset, maxPackedValue, offset) > 0) {
>> > -                System.arraycopy(pointValue.bytes, offset,
>> maxPackedValue, offset, numBytesPerDimension);
>> > +          if (numDimensions == 1) {
>> > +            // PointInSetQuery.MergePointVisitor expects values to be
>> visited in increasing order,
>> > +            // this is a 1d optimization which has to be done here
>> too. Otherwise we emit values
>> > +            // out of order which causes mismatches.
>> > +            Arrays.sort(pointValues, 0, pointValuesCount);
>> > +            minPackedValue = pointValues[0].bytes.clone();
>> > +            maxPackedValue = pointValues[pointValuesCount -
>> 1].bytes.clone();
>> > +          } else {
>> > +            minPackedValue = pointValues[0].bytes.clone();
>> > +            maxPackedValue = pointValues[0].bytes.clone();
>> > +            for (int i = 0; i < pointValuesCount; i++) {
>> > +              BytesRef pointValue = pointValues[i];
>> > +              assert pointValue.bytes.length == pointValue.length :
>> "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take
>> care of this";
>> > +              for (int dim = 0; dim < numDimensions; ++dim) {
>> > +                int offset = dim * numBytesPerDimension;
>> > +                if (StringHelper.compare(numBytesPerDimension,
>> pointValue.bytes, offset, minPackedValue, offset) < 0) {
>> > +                  System.arraycopy(pointValue.bytes, offset,
>> minPackedValue, offset, numBytesPerDimension);
>> > +                }
>> > +                if (StringHelper.compare(numBytesPerDimension,
>> pointValue.bytes, offset, maxPackedValue, offset) > 0) {
>> > +                  System.arraycopy(pointValue.bytes, offset,
>> maxPackedValue, offset, numBytesPerDimension);
>> > +                }
>> >               }
>> >             }
>> >           }
>> >
>> >
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
>> > ----------------------------------------------------------------------
>> > diff --git
>> a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
>> b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
>> > index 75344e3..8df1afb 100644
>> > ---
>> a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
>> > +++
>> b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
>> > @@ -476,6 +476,30 @@ public class TestMemoryIndex extends
>> LuceneTestCase {
>> >     assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new
>> double[] {10D, 10D}, new double[] {30D, 30D})));
>> >   }
>> >
>> > +  public void testMultiValuedPointsSortedCorrectly() throws Exception {
>> > +    Document doc = new Document();
>> > +    doc.add(new IntPoint("ints", 3));
>> > +    doc.add(new IntPoint("ints", 2));
>> > +    doc.add(new IntPoint("ints", 1));
>> > +    doc.add(new LongPoint("longs", 3L));
>> > +    doc.add(new LongPoint("longs", 2L));
>> > +    doc.add(new LongPoint("longs", 1L));
>> > +    doc.add(new FloatPoint("floats", 3F));
>> > +    doc.add(new FloatPoint("floats", 2F));
>> > +    doc.add(new FloatPoint("floats", 1F));
>> > +    doc.add(new DoublePoint("doubles", 3D));
>> > +    doc.add(new DoublePoint("doubles", 2D));
>> > +    doc.add(new DoublePoint("doubles", 1D));
>> > +
>> > +    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
>> > +    IndexSearcher s = mi.createSearcher();
>> > +
>> > +    assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
>> > +    assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
>> > +    assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
>> > +    assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
>> > +  }
>> > +
>> >   public void testIndexingPointsAndDocValues() throws Exception {
>> >     FieldType type = new FieldType();
>> >     type.setDimensions(1, 4);
>> >
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: dev-help@lucene.apache.org
>>
>>

Re: lucene-solr:branch_6_6: LUCENE-7869: Changed MemoryIndex to sort 1d points.

Posted by Martijn v Groningen <ma...@gmail.com>.

Yes, this is a mistake. Let me fix this.

On Thu, Jun 8, 2017 at 1:16 PM Alan Woodward <al...@flax.co.uk> wrote:

> There’s a few unrelated edits to CHANGES.txt here which I think might be a
> mistake?
>
> > On 8 Jun 2017, at 11:23, mvg@apache.org wrote:
> >
> > Repository: lucene-solr
> > Updated Branches:
> >  refs/heads/branch_6_6 0238eeef2 -> 703053094
> >
> >
> > LUCENE-7869: Changed MemoryIndex to sort 1d points.
> >
> > In case of 1d points, the PointInSetQuery.MergePointVisitor expects that
> these points are visited in ascending order.
> > Prior to this change the memory index doesn't do this and this can
> result in document with multiple points that should match to not match.
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> > Commit:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/70305309
> > Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/70305309
> > Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/70305309
> >
> > Branch: refs/heads/branch_6_6
> > Commit: 70305309471e906c8f64e4203aba475445278938
> > Parents: 0238eee
> > Author: Martijn van Groningen <ma...@gmail.com>
> > Authored: Wed Jun 7 19:55:32 2017 +0200
> > Committer: Martijn van Groningen <ma...@gmail.com>
> > Committed: Thu Jun 8 12:15:30 2017 +0200
> >
> > ----------------------------------------------------------------------
> > lucene/CHANGES.txt                              | 29 +++++-----------
> > .../apache/lucene/index/memory/MemoryIndex.java | 35 ++++++++++++--------
> > .../lucene/index/memory/TestMemoryIndex.java    | 24 ++++++++++++++
> > 3 files changed, 54 insertions(+), 34 deletions(-)
> > ----------------------------------------------------------------------
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/CHANGES.txt
> > ----------------------------------------------------------------------
> > diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
> > index f1bddde..1dfebfe 100644
> > --- a/lucene/CHANGES.txt
> > +++ b/lucene/CHANGES.txt
> > @@ -4,7 +4,12 @@ For more information on past and future Lucene
> versions, please see:
> > http://s.apache.org/luceneversions
> >
> > ======================= Lucene 6.6.1 =======================
> > -(No Changes)
> > +
> > +Bug Fixes
> > +
> > +* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d
> points, the PointInSetQuery.MergePointVisitor expects
> > +  that these points are visited in ascending order. The memory index
> doesn't do this and this can result in document
> > +  with multiple points that should match to not match. (Martijn van
> Groningen)
> >
> > ======================= Lucene 6.6.0 =======================
> >
> > @@ -102,9 +107,6 @@ Other
> > * LUCENE-7761: Fixed comment in ReqExclScorer.
> >   (Pablo Pita Leira via Adrien Grand)
> >
> > -* LUCENE-7815: Deprecate the PostingsHighlighter. It evolved into the
> > -  UnifiedHighlighter. (David Smiley)
> > -
> > ======================= Lucene 6.5.1 =======================
> >
> > Bug Fixes
> > @@ -118,9 +120,6 @@ Bug Fixes
> > * LUCENE-7769: The UnifiedHighligter wasn't highlighting portions of the
> query
> >   wrapped in BoostQuery or SpanBoostQuery. (David Smiley, Dmitry Malinin)
> >
> > -* LUCENE-7791: Fixed index sorting to work with sparse numeric and
> binary docvalues field.
> > -  (Przemyslaw Szeremiota via Jim Ferenczi)
> > -
> > Other
> >
> > * LUCENE-7763: Remove outdated comment in IndexWriterConfig.setIndexSort
> javadocs.
> > @@ -170,16 +169,15 @@ API Changes
> >   instead of once all shard responses are present. (Simon Willnauer,
> >   Mike McCandless)
> >
> > -* LUCENE-6819: Index-time boosts are deprecated. As a replacement,
> index-time
> > -  scoring factors should be indexed into a doc value field and combined
> at
> > -  query time using eg. FunctionScoreQuery. (Adrien Grand)
> > -
> > * LUCENE-7700: A cleanup of merge throughput control logic. Refactored
> all the
> >   code previously scattered throughout the IndexWriter and
> >   ConcurrentMergeScheduler into a more accessible set of public methods
> (see
> >   MergePolicy.OneMergeProgress, MergeScheduler.wrapForMerge and
> >   OneMerge.mergeInit). (Dawid Weiss, Mike McCandless).
> >
> > +* LUCENE-7734: FieldType's copy constructor was widened to accept any
> IndexableFieldType.
> > +  (David Smiley)
> > +
> > New Features
> >
> > * LUCENE-7738: Add new InetAddressRange for indexing and querying
> InetAddress
> > @@ -343,10 +341,6 @@ Other
> > * LUCENE-7658: queryparser/xml CoreParser now implements
> SpanQueryBuilder interface.
> >   (Daniel Collins, Christine Poerschke)
> >
> > -* LUCENE-7664: GeoPointField and its queries are deprecated in favor
> > -  of LatLonPoint, which offers faster indexing and searching
> > -  performance, smaller index, and less search-time heap usage.  (Mike
> McCandless)
> > -
> > * LUCENE-7715: NearSpansUnordered simplifications.
> >   (Paul Elschot via Adrien Grand)
> >
> > @@ -567,11 +561,6 @@ Optimizations
> >
> > * LUCENE-7572: Doc values queries now cache their hash code. (Adrien
> Grand)
> >
> > -* LUCENE-7579: Segments are now also sorted during flush, and merging
> > -  on a sorted index is substantially faster by using some of the same
> > -  bulk merge optimizations that non-sorted merging uses (Jim Ferenczi
> > -  via Mike McCandless)
> > -
> > Other
> >
> > * LUCENE-7546: Fixed references to benchmark wikipedia data and the
> Jenkins line-docs file
> >
> >
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> > ----------------------------------------------------------------------
> > diff --git
> a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> > index a1390b3..c8a4523 100644
> > ---
> a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> > +++
> b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> > @@ -929,20 +929,27 @@ public class MemoryIndex {
> >
> >           final int numDimensions = fieldInfo.getPointDimensionCount();
> >           final int numBytesPerDimension = fieldInfo.getPointNumBytes();
> > -          minPackedValue = pointValues[0].bytes.clone();
> > -          maxPackedValue = pointValues[0].bytes.clone();
> > -
> > -          for (int i = 0; i < pointValuesCount; i++) {
> > -            BytesRef pointValue = pointValues[i];
> > -            assert pointValue.bytes.length == pointValue.length :
> "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take
> care of this";
> > -
> > -            for (int dim = 0; dim < numDimensions; ++dim) {
> > -              int offset = dim * numBytesPerDimension;
> > -              if (StringHelper.compare(numBytesPerDimension,
> pointValue.bytes, offset, minPackedValue, offset) < 0) {
> > -                System.arraycopy(pointValue.bytes, offset,
> minPackedValue, offset, numBytesPerDimension);
> > -              }
> > -              if (StringHelper.compare(numBytesPerDimension,
> pointValue.bytes, offset, maxPackedValue, offset) > 0) {
> > -                System.arraycopy(pointValue.bytes, offset,
> maxPackedValue, offset, numBytesPerDimension);
> > +          if (numDimensions == 1) {
> > +            // PointInSetQuery.MergePointVisitor expects values to be
> visited in increasing order,
> > +            // this is a 1d optimization which has to be done here too.
> Otherwise we emit values
> > +            // out of order which causes mismatches.
> > +            Arrays.sort(pointValues, 0, pointValuesCount);
> > +            minPackedValue = pointValues[0].bytes.clone();
> > +            maxPackedValue = pointValues[pointValuesCount -
> 1].bytes.clone();
> > +          } else {
> > +            minPackedValue = pointValues[0].bytes.clone();
> > +            maxPackedValue = pointValues[0].bytes.clone();
> > +            for (int i = 0; i < pointValuesCount; i++) {
> > +              BytesRef pointValue = pointValues[i];
> > +              assert pointValue.bytes.length == pointValue.length :
> "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take
> care of this";
> > +              for (int dim = 0; dim < numDimensions; ++dim) {
> > +                int offset = dim * numBytesPerDimension;
> > +                if (StringHelper.compare(numBytesPerDimension,
> pointValue.bytes, offset, minPackedValue, offset) < 0) {
> > +                  System.arraycopy(pointValue.bytes, offset,
> minPackedValue, offset, numBytesPerDimension);
> > +                }
> > +                if (StringHelper.compare(numBytesPerDimension,
> pointValue.bytes, offset, maxPackedValue, offset) > 0) {
> > +                  System.arraycopy(pointValue.bytes, offset,
> maxPackedValue, offset, numBytesPerDimension);
> > +                }
> >               }
> >             }
> >           }
> >
> >
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> > ----------------------------------------------------------------------
> > diff --git
> a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> > index 75344e3..8df1afb 100644
> > ---
> a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> > +++
> b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> > @@ -476,6 +476,30 @@ public class TestMemoryIndex extends LuceneTestCase
> {
> >     assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new
> double[] {10D, 10D}, new double[] {30D, 30D})));
> >   }
> >
> > +  public void testMultiValuedPointsSortedCorrectly() throws Exception {
> > +    Document doc = new Document();
> > +    doc.add(new IntPoint("ints", 3));
> > +    doc.add(new IntPoint("ints", 2));
> > +    doc.add(new IntPoint("ints", 1));
> > +    doc.add(new LongPoint("longs", 3L));
> > +    doc.add(new LongPoint("longs", 2L));
> > +    doc.add(new LongPoint("longs", 1L));
> > +    doc.add(new FloatPoint("floats", 3F));
> > +    doc.add(new FloatPoint("floats", 2F));
> > +    doc.add(new FloatPoint("floats", 1F));
> > +    doc.add(new DoublePoint("doubles", 3D));
> > +    doc.add(new DoublePoint("doubles", 2D));
> > +    doc.add(new DoublePoint("doubles", 1D));
> > +
> > +    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
> > +    IndexSearcher s = mi.createSearcher();
> > +
> > +    assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
> > +    assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
> > +    assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
> > +    assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
> > +  }
> > +
> >   public void testIndexingPointsAndDocValues() throws Exception {
> >     FieldType type = new FieldType();
> >     type.setDimensions(1, 4);
> >
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>
>

Re: lucene-solr:branch_6_6: LUCENE-7869: Changed MemoryIndex to sort 1d points.

Posted by Alan Woodward <al...@flax.co.uk>.

There’s a few unrelated edits to CHANGES.txt here which I think might be a mistake?

> On 8 Jun 2017, at 11:23, mvg@apache.org wrote:
> 
> Repository: lucene-solr
> Updated Branches:
>  refs/heads/branch_6_6 0238eeef2 -> 703053094
> 
> 
> LUCENE-7869: Changed MemoryIndex to sort 1d points.
> 
> In case of 1d points, the PointInSetQuery.MergePointVisitor expects that these points are visited in ascending order.
> Prior to this change the memory index doesn't do this and this can result in document with multiple points that should match to not match.
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/70305309
> Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/70305309
> Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/70305309
> 
> Branch: refs/heads/branch_6_6
> Commit: 70305309471e906c8f64e4203aba475445278938
> Parents: 0238eee
> Author: Martijn van Groningen <ma...@gmail.com>
> Authored: Wed Jun 7 19:55:32 2017 +0200
> Committer: Martijn van Groningen <ma...@gmail.com>
> Committed: Thu Jun 8 12:15:30 2017 +0200
> 
> ----------------------------------------------------------------------
> lucene/CHANGES.txt                              | 29 +++++-----------
> .../apache/lucene/index/memory/MemoryIndex.java | 35 ++++++++++++--------
> .../lucene/index/memory/TestMemoryIndex.java    | 24 ++++++++++++++
> 3 files changed, 54 insertions(+), 34 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/CHANGES.txt
> ----------------------------------------------------------------------
> diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
> index f1bddde..1dfebfe 100644
> --- a/lucene/CHANGES.txt
> +++ b/lucene/CHANGES.txt
> @@ -4,7 +4,12 @@ For more information on past and future Lucene versions, please see:
> http://s.apache.org/luceneversions
> 
> ======================= Lucene 6.6.1 =======================
> -(No Changes)
> +
> +Bug Fixes
> +
> +* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d points, the PointInSetQuery.MergePointVisitor expects
> +  that these points are visited in ascending order. The memory index doesn't do this and this can result in document
> +  with multiple points that should match to not match. (Martijn van Groningen)
> 
> ======================= Lucene 6.6.0 =======================
> 
> @@ -102,9 +107,6 @@ Other
> * LUCENE-7761: Fixed comment in ReqExclScorer.
>   (Pablo Pita Leira via Adrien Grand)
> 
> -* LUCENE-7815: Deprecate the PostingsHighlighter. It evolved into the
> -  UnifiedHighlighter. (David Smiley)
> -
> ======================= Lucene 6.5.1 =======================
> 
> Bug Fixes
> @@ -118,9 +120,6 @@ Bug Fixes
> * LUCENE-7769: The UnifiedHighligter wasn't highlighting portions of the query
>   wrapped in BoostQuery or SpanBoostQuery. (David Smiley, Dmitry Malinin)
> 
> -* LUCENE-7791: Fixed index sorting to work with sparse numeric and binary docvalues field.
> -  (Przemyslaw Szeremiota via Jim Ferenczi)
> -
> Other
> 
> * LUCENE-7763: Remove outdated comment in IndexWriterConfig.setIndexSort javadocs.
> @@ -170,16 +169,15 @@ API Changes
>   instead of once all shard responses are present. (Simon Willnauer,
>   Mike McCandless)
> 
> -* LUCENE-6819: Index-time boosts are deprecated. As a replacement, index-time
> -  scoring factors should be indexed into a doc value field and combined at
> -  query time using eg. FunctionScoreQuery. (Adrien Grand)
> -
> * LUCENE-7700: A cleanup of merge throughput control logic. Refactored all the
>   code previously scattered throughout the IndexWriter and 
>   ConcurrentMergeScheduler into a more accessible set of public methods (see 
>   MergePolicy.OneMergeProgress, MergeScheduler.wrapForMerge and 
>   OneMerge.mergeInit). (Dawid Weiss, Mike McCandless).
> 
> +* LUCENE-7734: FieldType's copy constructor was widened to accept any IndexableFieldType.
> +  (David Smiley)
> +
> New Features
> 
> * LUCENE-7738: Add new InetAddressRange for indexing and querying InetAddress
> @@ -343,10 +341,6 @@ Other
> * LUCENE-7658: queryparser/xml CoreParser now implements SpanQueryBuilder interface.
>   (Daniel Collins, Christine Poerschke)
> 
> -* LUCENE-7664: GeoPointField and its queries are deprecated in favor
> -  of LatLonPoint, which offers faster indexing and searching
> -  performance, smaller index, and less search-time heap usage.  (Mike McCandless)
> -
> * LUCENE-7715: NearSpansUnordered simplifications.
>   (Paul Elschot via Adrien Grand)
> 
> @@ -567,11 +561,6 @@ Optimizations
> 
> * LUCENE-7572: Doc values queries now cache their hash code. (Adrien Grand)
> 
> -* LUCENE-7579: Segments are now also sorted during flush, and merging
> -  on a sorted index is substantially faster by using some of the same
> -  bulk merge optimizations that non-sorted merging uses (Jim Ferenczi
> -  via Mike McCandless)
> -
> Other
> 
> * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
> 
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> ----------------------------------------------------------------------
> diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> index a1390b3..c8a4523 100644
> --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
> @@ -929,20 +929,27 @@ public class MemoryIndex {
> 
>           final int numDimensions = fieldInfo.getPointDimensionCount();
>           final int numBytesPerDimension = fieldInfo.getPointNumBytes();
> -          minPackedValue = pointValues[0].bytes.clone();
> -          maxPackedValue = pointValues[0].bytes.clone();
> -
> -          for (int i = 0; i < pointValuesCount; i++) {
> -            BytesRef pointValue = pointValues[i];
> -            assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
> -
> -            for (int dim = 0; dim < numDimensions; ++dim) {
> -              int offset = dim * numBytesPerDimension;
> -              if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
> -                System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
> -              }
> -              if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
> -                System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
> +          if (numDimensions == 1) {
> +            // PointInSetQuery.MergePointVisitor expects values to be visited in increasing order,
> +            // this is a 1d optimization which has to be done here too. Otherwise we emit values
> +            // out of order which causes mismatches.
> +            Arrays.sort(pointValues, 0, pointValuesCount);
> +            minPackedValue = pointValues[0].bytes.clone();
> +            maxPackedValue = pointValues[pointValuesCount - 1].bytes.clone();
> +          } else {
> +            minPackedValue = pointValues[0].bytes.clone();
> +            maxPackedValue = pointValues[0].bytes.clone();
> +            for (int i = 0; i < pointValuesCount; i++) {
> +              BytesRef pointValue = pointValues[i];
> +              assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
> +              for (int dim = 0; dim < numDimensions; ++dim) {
> +                int offset = dim * numBytesPerDimension;
> +                if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
> +                  System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
> +                }
> +                if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
> +                  System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
> +                }
>               }
>             }
>           }
> 
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/70305309/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> ----------------------------------------------------------------------
> diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> index 75344e3..8df1afb 100644
> --- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
> @@ -476,6 +476,30 @@ public class TestMemoryIndex extends LuceneTestCase {
>     assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
>   }
> 
> +  public void testMultiValuedPointsSortedCorrectly() throws Exception {
> +    Document doc = new Document();
> +    doc.add(new IntPoint("ints", 3));
> +    doc.add(new IntPoint("ints", 2));
> +    doc.add(new IntPoint("ints", 1));
> +    doc.add(new LongPoint("longs", 3L));
> +    doc.add(new LongPoint("longs", 2L));
> +    doc.add(new LongPoint("longs", 1L));
> +    doc.add(new FloatPoint("floats", 3F));
> +    doc.add(new FloatPoint("floats", 2F));
> +    doc.add(new FloatPoint("floats", 1F));
> +    doc.add(new DoublePoint("doubles", 3D));
> +    doc.add(new DoublePoint("doubles", 2D));
> +    doc.add(new DoublePoint("doubles", 1D));
> +
> +    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
> +    IndexSearcher s = mi.createSearcher();
> +
> +    assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
> +    assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
> +    assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
> +    assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
> +  }
> +
>   public void testIndexingPointsAndDocValues() throws Exception {
>     FieldType type = new FieldType();
>     type.setDimensions(1, 4);
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org