You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2019/04/18 09:36:26 UTC

[lucene-solr] branch branch_8x updated (0d95f92 -> 57eb779)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a change to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 0d95f92  LUCENE-2562: Don't randomly enable term vectors on fields.
     new c1b71e6  Fix OneDimensionBKDWriter valueCount validation
     new 57eb779  LUCENE-7386: Flatten nested disjunctions.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/CHANGES.txt                                 |  7 ++
 .../org/apache/lucene/search/BooleanQuery.java     | 40 +++++++++-
 .../java/org/apache/lucene/util/bkd/BKDWriter.java |  4 +-
 .../apache/lucene/search/TestBooleanRewrites.java  | 64 +++++++++++++++
 .../test/org/apache/lucene/util/bkd/TestBKD.java   | 91 ++++++++++++++++++++++
 5 files changed, 203 insertions(+), 3 deletions(-)


[lucene-solr] 01/02: Fix OneDimensionBKDWriter valueCount validation

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit c1b71e6cb670a34710e3c3e900ca8aa4fd55edd7
Author: Zhao Yang <zh...@gmail.com>
AuthorDate: Tue Apr 16 09:32:40 2019 +0800

    Fix OneDimensionBKDWriter valueCount validation
    
    Signed-off-by: Adrien Grand <jp...@gmail.com>
---
 lucene/CHANGES.txt                                 |  3 +
 .../java/org/apache/lucene/util/bkd/BKDWriter.java |  4 +-
 .../test/org/apache/lucene/util/bkd/TestBKD.java   | 91 ++++++++++++++++++++++
 3 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ddd7e1f..e60ab1c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -70,6 +70,9 @@ Bug fixes
 * LUCENE-8754: Fix ConcurrentModificationException in SegmentInfo if
   attributes are accessed in MergePolicy while the merge is running (Simon Willnauer)
 
+* LUCENE-8765: Fixed validation of the number of added points in KD trees.
+  (Zhao Yang via Adrien Grand)
+
 Improvements
 
 * LUCENE-8673: Use radix partitioning when merging dimensional points instead
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 3073728..3fa3eb6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -553,8 +553,8 @@ public class BKDWriter implements Closeable {
       docsSeen.set(docID);
       leafCount++;
 
-      if (valueCount > totalPointCount) {
-        throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
+      if (valueCount + leafCount > totalPointCount) {
+        throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + leafCount + " values");
       }
 
       if (leafCount == maxPointsInLeafNode) {
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index b651b53..282add9 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.BitSet;
 import java.util.List;
 
+import org.apache.lucene.codecs.MutablePointValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
@@ -1245,4 +1246,94 @@ public class TestBKD extends LuceneTestCase {
     pointsIn.close();
     dir.close();
   }
+
+  public void testTotalPointCountValidation() throws IOException {
+    Directory dir = newDirectory();
+    final int numValues = 10;
+    final int numPointsAdded = 50; // exceeds totalPointCount
+    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
+    final byte[] pointValue = new byte[numBytesPerDim];
+    random().nextBytes(pointValue);
+
+    MutablePointValues reader = new MutablePointValues() {
+
+      @Override
+      public void intersect(IntersectVisitor visitor) throws IOException {
+        for(int i=0;i<numPointsAdded;i++) {
+          visitor.visit(0, pointValue);
+        }
+      }
+
+      @Override
+      public long estimatePointCount(IntersectVisitor visitor) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public byte[] getMinPackedValue() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public byte[] getMaxPackedValue() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int getNumDataDimensions() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int getNumIndexDimensions() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public int getBytesPerDimension() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public long size() {
+        return numPointsAdded;
+      }
+
+      @Override
+      public int getDocCount() {
+        return numPointsAdded;
+      }
+
+      @Override
+      public void swap(int i, int j) {
+        // do nothing
+      }
+
+      @Override
+      public int getDocID(int i) {
+        return 0;
+      }
+
+      @Override
+      public void getValue(int i, BytesRef packedValue) {
+        packedValue.bytes = pointValue;
+      }
+
+      @Override
+      public byte getByteAt(int i, int k) {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, 1, numBytesPerDim, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
+        BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
+    expectThrows(IllegalStateException.class, () -> {
+      try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+        w.writeField(out, "test_field_name", reader);
+      } finally {
+        w.close();
+        dir.close();
+      }
+    });
+  }
 }


[lucene-solr] 02/02: LUCENE-7386: Flatten nested disjunctions.

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 57eb7797a874a62476dff3ed0c24d5fd2827649f
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Apr 18 11:24:50 2019 +0200

    LUCENE-7386: Flatten nested disjunctions.
---
 lucene/CHANGES.txt                                 |  4 ++
 .../org/apache/lucene/search/BooleanQuery.java     | 40 +++++++++++++-
 .../apache/lucene/search/TestBooleanRewrites.java  | 64 ++++++++++++++++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e60ab1c..2e12a17 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -125,6 +125,10 @@ Changes in Runtime Behavior
   the only difference in output between the two is in the position length
   attribute.  (Alan Woodward, Jim Ferenczi)
 
+* LUCENE-7386: Disjunctions nested in disjunctions are now flattened. This might
+  trigger changes in the produced scores due to changes to the order in which
+  scores of sub clauses are summed up. (Adrien Grand)
+
 Other
 
 * LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera)
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 87105b0..ff264e7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -173,6 +173,15 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
     return clauseSets.get(occur);
   }
 
+  /**
+   * Whether this query is a pure disjunction, ie. it only has SHOULD clauses
+   * and it is enough for a single clause to match for this boolean query to match.
+   */
+  boolean isPureDisjunction() {
+    return clauses.size() == getClauses(Occur.SHOULD).size()
+        && minimumNumberShouldMatch <= 1;
+  }
+
   /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
    * make it possible to do:
    * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
@@ -245,9 +254,13 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
         Query query = clause.getQuery();
         Query rewritten = query.rewrite(reader);
         if (rewritten != query) {
+          // rewrite clause
           actuallyRewritten = true;
+          builder.add(rewritten, clause.getOccur());
+        } else {
+          // leave as-is
+          builder.add(clause);
         }
-        builder.add(rewritten, clause.getOccur());
       }
       if (actuallyRewritten) {
         return builder.build();
@@ -448,6 +461,31 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
       }
     }
 
+    // Flatten nested disjunctions, this is important for block-max WAND to perform well
+    if (minimumNumberShouldMatch <= 1) {
+      BooleanQuery.Builder builder = new BooleanQuery.Builder();
+      builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
+      boolean actuallyRewritten = false;
+      for (BooleanClause clause : clauses) {
+        if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) {
+          BooleanQuery innerQuery = (BooleanQuery) clause.getQuery();
+          if (innerQuery.isPureDisjunction()) {
+            actuallyRewritten = true;
+            for (BooleanClause innerClause : innerQuery.clauses()) {
+              builder.add(innerClause);
+            }
+          } else {
+            builder.add(clause);
+          }
+        } else {
+          builder.add(clause);
+        }
+      }
+      if (actuallyRewritten) {
+        return builder.build();
+      }
+    }
+
     return super.rewrite(reader);
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
index 56c5f8a..2120c51 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
@@ -480,4 +480,68 @@ public class TestBooleanRewrites extends LuceneTestCase {
         .build();
     assertEquals(expected, searcher.rewrite(query));
   }
+
+  public void testFlattenInnerDisjunctions() throws IOException {
+    IndexSearcher searcher = newSearcher(new MultiReader());
+
+    Query inner = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
+        .build();
+    Query query = new BooleanQuery.Builder()
+        .add(inner, Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .build();
+    Query expectedRewritten = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .build();
+    assertEquals(expectedRewritten, searcher.rewrite(query));
+
+    query = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(0)
+        .add(inner, Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+        .build();
+    expectedRewritten = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(0)
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+        .build();
+    assertEquals(expectedRewritten, searcher.rewrite(query));
+
+    query = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(1)
+        .add(inner, Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+        .build();
+    expectedRewritten = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(1)
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+        .build();
+    assertEquals(expectedRewritten, searcher.rewrite(query));
+
+    query = new BooleanQuery.Builder()
+        .setMinimumNumberShouldMatch(2)
+        .add(inner, Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
+        .build();
+    assertSame(query, searcher.rewrite(query));
+
+    inner = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .setMinimumNumberShouldMatch(2)
+        .build();
+    query = new BooleanQuery.Builder()
+        .add(inner, Occur.SHOULD)
+        .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
+        .build();
+    assertSame(query, searcher.rewrite(query));
+  }
 }